Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

validate_links.py 2.0 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. #!/usr/bin/env python3
  2. import httplib2
  3. import re
  4. import socket
  5. import sys
  6. def parse_links(filename):
  7. """Returns a list of URLs from text file"""
  8. with open(filename) as fp:
  9. data = fp.read()
  10. raw_links = re.findall(
  11. '((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))',
  12. data)
  13. links = [raw_link[0] for raw_link in raw_links]
  14. return links
  15. def validate_links(links):
  16. """Checks each entry in JSON file for live link"""
  17. print('Validating {} links...'.format(len(links)))
  18. errors = []
  19. for link in links:
  20. h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25)
  21. try:
  22. resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'})
  23. code = int(resp[0]['status'])
  24. # check if status code is a client or server error
  25. if code >= 404:
  26. errors.append('{}: {}'.format(code, link))
  27. except TimeoutError:
  28. errors.append("TMO: " + link)
  29. except socket.error as socketerror:
  30. errors.append("SOC: {} : {}".format(socketerror, link))
  31. except Exception as e:
  32. # Ignore some exceptions which are not actually errors.
  33. # The list below should be extended with other exceptions in the future if needed
  34. if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and
  35. (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) :
  36. errors.append("ERR: {} : {}".format(e, link))
  37. return errors
  38. if __name__ == "__main__":
  39. num_args = len(sys.argv)
  40. if num_args < 2:
  41. print("No .md file passed")
  42. sys.exit(1)
  43. errors = validate_links(parse_links(sys.argv[1]))
  44. if len(errors) > 0:
  45. for err in errors:
  46. print(err)
  47. sys.exit(1)