From c2bdd9e5fc4f00807254d9d2123e985dce555a3c Mon Sep 17 00:00:00 2001 From: Matheus Felipe <50463866+matheusfelipeog@users.noreply.github.com> Date: Mon, 7 Feb 2022 07:09:01 -0300 Subject: [PATCH] Fix false negative http code 404 in verification Some links when they were being checked returned the http code 404, but the links were working correctly. This was happening because before the request the link was concatenated with the / character at the end, making it a different link from the original. If the original link didn't have a path that is accessed by / at the end, it would return a 404 error. This behavior made it a false negative. --- scripts/validate/links.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/scripts/validate/links.py b/scripts/validate/links.py index a5172b8b..9f808871 100644 --- a/scripts/validate/links.py +++ b/scripts/validate/links.py @@ -17,7 +17,7 @@ def find_links_in_text(text: str) -> List[str]: raw_links = re.findall(link_pattern, text) links = [ - str(raw_link[0]).rstrip('/') for raw_link in raw_links + str(raw_link[0]) for raw_link in raw_links ] return links @@ -49,6 +49,7 @@ def check_duplicate_links(links: List[str]) -> Tuple[bool, List]: has_duplicate = False for link in links: + link = link.rstrip('/') if link not in seen: seen[link] = 1 else: @@ -163,7 +164,7 @@ def check_if_link_is_working(link: str) -> Tuple[bool, str]: error_message = '' try: - resp = requests.get(link + '/', timeout=25, headers={ + resp = requests.get(link, timeout=25, headers={ 'User-Agent': fake_user_agent(), 'host': get_host_from_link(link) })