|
|
@@ -19,35 +19,45 @@ def parse_links(filename): |
|
|
|
|
|
|
|
def validate_links(links): |
|
|
|
"""Checks each entry in JSON file for live link""" |
|
|
|
print('Validating {} links...'.format(len(links))) |
|
|
|
errors = [] |
|
|
|
print(f'Validating {len(links)} links...') |
|
|
|
hasError = False |
|
|
|
for link in links: |
|
|
|
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) |
|
|
|
try: |
|
|
|
resp = h.request(link, headers={'user-agent': 'python-httplib2/0.18.0'}) |
|
|
|
resp = h.request(link, headers={ |
|
|
|
# Faking user agent as some hosting services block not-whitelisted UA |
|
|
|
'user-agent': 'Mozilla/5.0' |
|
|
|
}) |
|
|
|
code = int(resp[0]['status']) |
|
|
|
# check if status code is a client or server error |
|
|
|
if code >= 404: |
|
|
|
errors.append('{}: {}'.format(code, link)) |
|
|
|
# Checking status code errors |
|
|
|
if (code >= 300): |
|
|
|
hasError = True |
|
|
|
print(f"ERR:CLT:{code} : {link}") |
|
|
|
except TimeoutError: |
|
|
|
errors.append("TMO: " + link) |
|
|
|
hasError = True |
|
|
|
print(f"ERR:TMO: {link}") |
|
|
|
except socket.error as socketerror: |
|
|
|
errors.append("SOC: {} : {}".format(socketerror, link)) |
|
|
|
hasError = True |
|
|
|
print(f"ERR:SOC: {socketerror} : {link}") |
|
|
|
except Exception as e: |
|
|
|
hasError = True |
|
|
|
# Ignore some exceptions which are not actually errors. |
|
|
|
# The list below should be extended with other exceptions in the future if needed |
|
|
|
if ((-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")) and |
|
|
|
(-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)"))) : |
|
|
|
errors.append("ERR: {} : {}".format(e, link)) |
|
|
|
return errors |
|
|
|
if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")): |
|
|
|
print(f"ERR:SSL: {e} : {link}") |
|
|
|
elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")): |
|
|
|
print(f"ERR:GZP: {e} : {link}") |
|
|
|
elif (-1 != str(e).find("Unable to find the server at")): |
|
|
|
print(f"ERR:SRV: {e} : {link}") |
|
|
|
else: |
|
|
|
print(f"ERR:UKN: {e} : {link}") |
|
|
|
return hasError |
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
num_args = len(sys.argv) |
|
|
|
if num_args < 2: |
|
|
|
print("No .md file passed") |
|
|
|
sys.exit(1) |
|
|
|
errors = validate_links(parse_links(sys.argv[1])) |
|
|
|
if len(errors) > 0: |
|
|
|
for err in errors: |
|
|
|
print(err) |
|
|
|
hasError = validate_links(parse_links(sys.argv[1])) |
|
|
|
if hasError: |
|
|
|
sys.exit(1) |