From 7f7a3906e57b2c3c8908badbf4d9578716cf1472 Mon Sep 17 00:00:00 2001 From: Yann Bertrand <5855339+yannbertrand@users.noreply.github.com> Date: Thu, 19 Nov 2020 21:50:32 +0100 Subject: [PATCH] Faking user agent + handle "no server" and fallback error --- build/validate_links.py | 42 +++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/build/validate_links.py b/build/validate_links.py index 192e74fe..ab36b0c8 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -19,35 +19,45 @@ def parse_links(filename): def validate_links(links): """Checks each entry in JSON file for live link""" - print('Validating {} links...'.format(len(links))) - errors = [] + print(f'Validating {len(links)} links...') + hasError = False for link in links: h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) try: - resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'}) + resp = h.request(link, headers={ + # Faking user agent as some hosting services block not-whitelisted UA + 'user-agent': 'Mozilla/5.0' + }) code = int(resp[0]['status']) - # check if status code is a client or server error - if code >= 404: - errors.append('{}: {}'.format(code, link)) + # Checking status code errors + if (code >= 300): + hasError = True + print(f"ERR:CLT:{code} : {link}") except TimeoutError: - errors.append("TMO: " + link) + hasError = True + print(f"ERR:TMO: {link}") except socket.error as socketerror: - errors.append("SOC: {} : {}".format(socketerror, link)) + hasError = True + print(f"ERR:SOC: {socketerror} : {link}") except Exception as e: + hasError = True # Ignore some exceptions which are not actually errors. # The list below should be extended with other exceptions in the future if needed - if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and - (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) : - errors.append("ERR: {} : {}".format(e, link)) - return errors + if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")): + print(f"ERR:SSL: {e} : {link}") + elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")): + print(f"ERR:GZP: {e} : {link}") + elif (-1 != str(e).find("Unable to find the server at")): + print(f"ERR:SRV: {e} : {link}") + else: + print(f"ERR:UKN: {e} : {link}") + return hasError if __name__ == "__main__": num_args = len(sys.argv) if num_args < 2: print("No .md file passed") sys.exit(1) - errors = validate_links(parse_links(sys.argv[1])) - if len(errors) > 0: - for err in errors: - print(err) + hasError = validate_links(parse_links(sys.argv[1])) + if hasError: sys.exit(1)