From 97cb773f4517dfa04fa3b49b373b39e9461388dd Mon Sep 17 00:00:00 2001 From: Marek Dano Date: Sat, 20 Mar 2021 09:36:33 +0000 Subject: [PATCH 1/3] Add check for checking duplicated links in README file Closes #1592 --- build/validate_links.py | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/build/validate_links.py b/build/validate_links.py index ab36b0c8..c4eafce9 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -5,6 +5,11 @@ import re import socket import sys +ignored_links = [ + 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22', + 'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master', + 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22', +] def parse_links(filename): """Returns a list of URLs from text file""" @@ -16,6 +21,29 @@ def parse_links(filename): links = [raw_link[0] for raw_link in raw_links] return links +def dup_links(links): + """Check for duplicated links""" + print(f'Checking for duplicated links...') + hasError = False + seen = {} + dupes = [] + + for x in links: + if x in ignored_links: + continue + + if x not in seen: + seen[x] = 1 + else: + if seen[x] == 1: + dupes.append(x) + + if not dupes: + print(f"No duplicated links") + else: + print(f"Found duplicated links: {dupes}") + hasError = True + return hasError def validate_links(links): """Checks each entry in JSON file for live link""" @@ -58,6 +86,9 @@ if __name__ == "__main__": if num_args < 2: print("No .md file passed") sys.exit(1) - hasError = validate_links(parse_links(sys.argv[1])) + links = parse_links(sys.argv[1]) + hasError = dup_links(links) + if not hasError: + hasError = validate_links(links) if hasError: sys.exit(1) From 9722487f4699525a33d609a6ab24d6d6d44f6fac Mon Sep 17 00:00:00 2001 From: Marek Dano Date: Sun, 21 Mar 2021 07:58:56 +0000 Subject: [PATCH 2/3] Add ignore link and strip url links before checking it --- build/validate_links.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/build/validate_links.py b/build/validate_links.py index c4eafce9..eb5dfa30 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -9,6 +9,7 @@ ignored_links = [ 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Run+tests%22', 'https://github.com/public-apis/public-apis/workflows/Validate%20links/badge.svg?branch=master', 'https://github.com/public-apis/public-apis/actions?query=workflow%3A%22Validate+links%22', + 'https://github.com/davemachado/public-api', ] def parse_links(filename): @@ -28,20 +29,21 @@ def dup_links(links): seen = {} dupes = [] - for x in links: - if x in ignored_links: + for link in links: + link = link.rstrip('/') + if link in ignored_links: continue - if x not in seen: - seen[x] = 1 + if link not in seen: + seen[link] = 1 else: - if seen[x] == 1: - dupes.append(x) + if seen[link] == 1: + dupes.append(link) if not dupes: - print(f"No duplicated links") + print(f"No duplicate links") else: - print(f"Found duplicated links: {dupes}") + print(f"Found duplicate links: {dupes}") hasError = True return hasError From ed25ea30ac9ada949d553326576cd2b4b6b2f751 Mon Sep 17 00:00:00 2001 From: Marek Dano Date: Tue, 23 Mar 2021 18:04:31 +0000 Subject: [PATCH 3/3] Remove duplicate check of links in validate_format.py --- build/validate_format.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/build/validate_format.py b/build/validate_format.py index 22fca30a..545fea2c 100755 --- a/build/validate_format.py +++ b/build/validate_format.py @@ -20,7 +20,6 @@ num_segments = 5 errors = [] title_links = [] -previous_links = [] anchor_re = re.compile(anchor + '\s(.+)') section_title_re = re.compile('\*\s\[(.*)\]') link_re = re.compile('\[(.+)\]\((http.*)\)') @@ -68,12 +67,6 @@ def check_entry(line_num, segments): title = title_re_match.group(1) if title.upper().endswith(' API'): add_error(line_num, 'Title should not end with "... API". Every entry is an API here!') - # do not allow duplicate links - link = title_re_match.group(2) - if link in previous_links: - add_error(line_num, 'Duplicate link - entries should only be included in one section') - else: - previous_links.append(link) # END Title # START Description # first character should be capitalized