diff --git a/build/validate_links.py b/build/validate_links.py index eb5dfa30..389de8cc 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -14,12 +14,19 @@ ignored_links = [ def parse_links(filename): """Returns a list of URLs from text file""" - with open(filename) as fp: - data = fp.read() + with open(filename, mode='r', encoding='utf-8') as fp: + readme = fp.read() + index_section = readme.find('## Index') + content = readme[index_section:] + raw_links = re.findall( '((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))', - data) - links = [raw_link[0] for raw_link in raw_links] + content) + + links = [ + str(raw_link[0]).rstrip('/') for raw_link in raw_links + ] + return links def dup_links(links): @@ -30,7 +37,6 @@ def dup_links(links): dupes = [] for link in links: - link = link.rstrip('/') if link in ignored_links: continue