From 3979c871cdd54f0a8679ff20217fe50c613d8a31 Mon Sep 17 00:00:00 2001 From: Dmytro Khmelenko Date: Fri, 22 Oct 2021 02:48:05 +0200 Subject: [PATCH] Set host header for link validator (#2409) --- build/validate_links.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/build/validate_links.py b/build/validate_links.py index 9bb4405b..bd8446c3 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -51,9 +51,16 @@ def validate_links(links): for link in links: h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) try: + # fetching host name, removing leading www + host = link.split('//', 1)[1].split('/', 1)[0] + if host[:3] == 'www': + host = host[4:] + resp = h.request(link, headers={ # Faking user agent as some hosting services block not-whitelisted UA - 'user-agent': 'Mozilla/5.0' + 'user-agent': 'Mozilla/5.0', + # setting host because Cloudflare returns 403 asking for captcha if host is missing + 'host': host }) code = int(resp[0]['status']) # Checking status code errors