You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

50 lines
1.5 KiB

  1. #!/usr/bin/env python3
  2. import httplib2
  3. import re
  4. import socket
  5. import sys
  6. def parse_links(filename):
  7. """Returns a list of URLs from text file"""
  8. with open(filename) as fp:
  9. data = fp.read()
  10. raw_links = re.findall(
  11. 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+',
  12. data)
  13. links = [raw_link.replace(')', '') for raw_link in raw_links]
  14. return links
  15. def validate_links(links):
  16. """Checks each entry in JSON file for live link"""
  17. print('Validating {} links...'.format(len(links)))
  18. errors = []
  19. for link in links:
  20. h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
  21. try:
  22. resp = h.request(link, 'HEAD')
  23. code = int(resp[0]['status'])
  24. # check if status code is a client or server error
  25. if code >= 404:
  26. errors.append('{}: {}'.format(code, link))
  27. except TimeoutError:
  28. errors.append("TMO: " + link)
  29. except socket.error as socketerror:
  30. errors.append("SOC: {} : {}".format(socketerror, link))
  31. except Exception as e:
  32. errors.append("ERR: {} : {}".format(e, link))
  33. return errors
  34. if __name__ == "__main__":
  35. num_args = len(sys.argv)
  36. if num_args < 2:
  37. print("No .md file passed")
  38. sys.exit(1)
  39. errors = validate_links(parse_links(sys.argv[1]))
  40. if len(errors) > 0:
  41. for err in errors:
  42. print(err)
  43. sys.exit(1)