diff --git a/build/validate_format.py b/build/validate_format.py index 22fca30a..545fea2c 100755 --- a/build/validate_format.py +++ b/build/validate_format.py @@ -20,7 +20,6 @@ num_segments = 5 errors = [] title_links = [] -previous_links = [] anchor_re = re.compile(anchor + '\s(.+)') section_title_re = re.compile('\*\s\[(.*)\]') link_re = re.compile('\[(.+)\]\((http.*)\)') @@ -68,12 +67,6 @@ def check_entry(line_num, segments): title = title_re_match.group(1) if title.upper().endswith(' API'): add_error(line_num, 'Title should not end with "... API". Every entry is an API here!') - # do not allow duplicate links - link = title_re_match.group(2) - if link in previous_links: - add_error(line_num, 'Duplicate link - entries should only be included in one section') - else: - previous_links.append(link) # END Title # START Description # first character should be capitalized diff --git a/build/validate_links.py b/build/validate_links.py index ab36b0c8..eb5dfa30 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -5,6 +5,12 @@ import re import socket import sys +ignored_links = [ + 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22', + 'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master', + 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22', + 'https://github.com/davemachado/public-api', +] def parse_links(filename): """Returns a list of URLs from text file""" @@ -16,6 +22,30 @@ def parse_links(filename): links = [raw_link[0] for raw_link in raw_links] return links +def dup_links(links): + """Check for duplicated links""" + print(f'Checking for duplicated links...') + hasError = False + seen = {} + dupes = [] + + for link in links: + link = link.rstrip('/') + if link in ignored_links: + continue + + if link not in seen: + seen[link] = 1 + else: + if seen[link] == 1: + dupes.append(link) + + if not dupes: + print(f"No duplicate links") + else: + print(f"Found duplicate links: {dupes}") + hasError = True + return hasError def validate_links(links): """Checks each entry in JSON file for live link""" @@ -58,6 +88,9 @@ if __name__ == "__main__": if num_args < 2: print("No .md file passed") sys.exit(1) - hasError = validate_links(parse_links(sys.argv[1])) + links = parse_links(sys.argv[1]) + hasError = dup_links(links) + if not hasError: + hasError = validate_links(links) if hasError: sys.exit(1)