Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

validate_links.py 1.4 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. #!/usr/bin/env python3
  2. import httplib2
  3. import re
  4. import socket
  5. import sys
  6. def parse_links(filename):
  7. """Returns a list of URLs from text file"""
  8. with open(filename) as fp:
  9. data = fp.read()
  10. raw_links = re.findall(
  11. 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
  12. data)
  13. links = [raw_link.replace(')', '') for raw_link in raw_links]
  14. return links
  15. def validate_links(links):
  16. """Checks each entry in JSON file for live link"""
  17. print('Validating {} links...'.format(len(links)))
  18. errors = []
  19. for link in links:
  20. h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
  21. try:
  22. resp = h.request(link, 'HEAD')
  23. code = int(resp[0]['status'])
  24. # check if status code is a client or server error
  25. if code >= 404:
  26. errors.append('{}: {}'.format(code, link))
  27. except TimeoutError:
  28. errors.append("TMO: " + link)
  29. except socket.error as socketerror:
  30. errors.append("SOC: {} : {}".format(socketerror, link))
  31. return errors
  32. if __name__ == "__main__":
  33. num_args = len(sys.argv)
  34. if num_args < 2:
  35. print("No .md file passed")
  36. sys.exit(1)
  37. errors = validate_links(parse_links(sys.argv[1]))
  38. if len(errors) > 0:
  39. for err in errors:
  40. print(err)
  41. sys.exit(1)