diff --git a/build/github-pull.sh b/build/github-pull.sh deleted file mode 100755 index 4c5c3a4b..00000000 --- a/build/github-pull.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/env bash - -set -e - -# Argument validation -if [ $# -ne 3 ]; then - echo "Usage: $0 " - exit 1 -fi - -# Assign variables -GITHUB_REPOSITORY="$1" -GITHUB_PULL_REQUEST="$2" -FORMAT_FILE="$3" - -# Move to root of project -cd "$GITHUB_WORKSPACE" - -# Determine files -FORMAT_FILE="$( realpath "${FORMAT_FILE}" )" - -# Skip if build number could not be determined -if [ -z "$GITHUB_REPOSITORY" -o -z "$GITHUB_PULL_REQUEST" ]; then - echo "No pull request and/or repository is provided" - exit 1 -fi - -# Pull changes on PR -echo "running on Pull Request #$GITHUB_PULL_REQUEST" - -# Trick the URL validator python script into not seeing this as a URL -DUMMY_SCHEME="https" -DIFF_URL="$DUMMY_SCHEME://patch-diff.githubusercontent.com/raw/$GITHUB_REPOSITORY/pull/$GITHUB_PULL_REQUEST.diff" -curl -L -o diff.txt "$DIFF_URL" - -# Construct diff -echo "------- BEGIN DIFF -------" -cat diff.txt -echo "-------- END DIFF --------" -cat diff.txt | egrep "\+" > additions.txt - -echo "------ BEGIN ADDITIONS -----" -cat additions.txt -echo "------- END ADDITIONS ------" -LINK_FILE=additions.txt - -# Validate links -echo "Running link validation..." -./build/validate_links.py "$LINK_FILE" - -# Vebosity -if [[ $? != 0 ]]; then - echo "link validation failed!" - exit 1 -else - echo "link validation passed!" -fi diff --git a/build/requirements.txt b/build/requirements.txt deleted file mode 100644 index ebed23a8..00000000 --- a/build/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -flake8>=3.5.0 -httplib2==0.19.0 diff --git a/build/validate_format.py b/build/validate_format.py deleted file mode 100755 index 4610f379..00000000 --- a/build/validate_format.py +++ /dev/null @@ -1,168 +0,0 @@ -#!/usr/bin/env python3 - -import re -import sys - -anchor = '###' -min_entries_per_section = 3 -auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No', 'User-Agent'] -punctuation = ['.', '?', '!'] -https_keys = ['Yes', 'No'] -cors_keys = ['Yes', 'No', 'Unknown'] - -index_title = 0 -index_desc = 1 -index_auth = 2 -index_https = 3 -index_cors = 4 -index_link = 5 -num_segments = 5 - -errors = [] -title_links = [] -anchor_re = re.compile(anchor + '\s(.+)') -section_title_re = re.compile('\*\s\[(.*)\]') -link_re = re.compile('\[(.+)\]\((http.*)\)') - - -def add_error(line_num, message): - """adds an error to the dynamic error list""" - err = '(L{:03d}) {}'.format(line_num + 1, message) - errors.append(err) - - -def check_alphabetical(lines): - """ - checks if all entries per section are in alphabetical order based in entry title - """ - sections = {} - section_line_num = {} - for line_num, line in enumerate(lines): - if line.startswith(anchor): - category = line.split(anchor)[1].strip() - sections[category] = [] - section_line_num[category] = line_num - continue - if not line.startswith('|') or line.startswith('|---'): - continue - raw_title = [x.strip() for x in line.split('|')[1:-1]][0] - title_re_match = link_re.match(raw_title) - if title_re_match: - sections[category].append(title_re_match.group(1).upper()) - - for category, entries in sections.items(): - if sorted(entries) != entries: - add_error(section_line_num[category], "{} section is not in alphabetical order".format(category)) - - -def check_entry(line_num, segments): - # START Title - raw_title = segments[index_title] - title_re_match = link_re.match(raw_title) - # url should be wrapped in '[TITLE](LINK)' Markdown syntax - if not title_re_match: - add_error(line_num, 'Title syntax should be "[TITLE](LINK)"') - else: - # do not allow "... API" in the entry title - title = title_re_match.group(1) - if title.upper().endswith(' API'): - add_error(line_num, 'Title should not end with "... API". Every entry is an API here!') - # END Title - # START Description - # first character should be capitalized - char = segments[index_desc][0] - if char.upper() != char: - add_error(line_num, "first character of description is not capitalized") - # last character should not punctuation - char = segments[index_desc][-1] - if char in punctuation: - add_error(line_num, "description should not end with {}".format(char)) - desc_length = len(segments[index_desc]) - if desc_length > 100: - add_error(line_num, "description should not exceed 100 characters (currently {})".format(desc_length)) - # END Description - # START Auth - # values should conform to valid options only - auth = segments[index_auth] - if auth != 'No' and (not auth.startswith('`') or not auth.endswith('`')): - add_error(line_num, "auth value is not enclosed with `backticks`") - if auth.replace('`', '') not in auth_keys: - add_error(line_num, "{} is not a valid Auth option".format(auth)) - # END Auth - # START HTTPS - # values should conform to valid options only - https = segments[index_https] - if https not in https_keys: - add_error(line_num, "{} is not a valid HTTPS option".format(https)) - # END HTTPS - # START CORS - # values should conform to valid options only - cors = segments[index_cors] - if cors not in cors_keys: - add_error(line_num, "{} is not a valid CORS option".format(cors)) - # END CORS - - -def check_format(filename): - """ - validates that each line is formatted correctly, - appending to error list as needed - """ - with open(filename) as fp: - lines = list(line.rstrip() for line in fp) - check_alphabetical(lines) - # START Check Entries - num_in_category = min_entries_per_section + 1 - category = "" - category_line = 0 - for line_num, line in enumerate(lines): - if section_title_re.match(line): - title_links.append(section_title_re.match(line).group(1)) - # check each section for the minimum number of entries - if line.startswith(anchor): - match = anchor_re.match(line) - if match: - if match.group(1) not in title_links: - add_error(line_num, "section header ({}) not added as a title link".format(match.group(1))) - else: - add_error(line_num, "section header is not formatted correctly") - if num_in_category < min_entries_per_section: - add_error(category_line, "{} section does not have the minimum {} entries (only has {})".format( - category, min_entries_per_section, num_in_category)) - category = line.split(' ')[1] - category_line = line_num - num_in_category = 0 - continue - # skips lines that we do not care about - if not line.startswith('|') or line.startswith('|---'): - continue - num_in_category += 1 - segments = line.split('|')[1:-1] - if len(segments) < num_segments: - add_error(line_num, "entry does not have all the required sections (have {}, need {})".format( - len(segments), num_segments)) - continue - # START Global - for segment in segments: - # every line segment should start and end with exactly 1 space - if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1: - add_error(line_num, "each segment must start and end with exactly 1 space") - # END Global - segments = [seg.strip() for seg in segments] - check_entry(line_num, segments) - # END Check Entries - - -def main(): - if len(sys.argv) < 2: - print("No file passed (file should contain Markdown table syntax)") - sys.exit(1) - check_format(sys.argv[1]) - if len(errors) > 0: - for err in errors: - print(err) - sys.exit(1) - - -if __name__ == "__main__": - main() diff --git a/build/validate_links.py b/build/validate_links.py deleted file mode 100755 index c0331d4d..00000000 --- a/build/validate_links.py +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/env python3 - -import httplib2 -import re -import socket -import sys - - -def parse_links(filename): - """Returns a list of URLs from text file""" - with open(filename, mode='r', encoding='utf-8') as fp: - readme = fp.read() - index_section = readme.find('## Index') - content = readme[index_section:] - - raw_links = re.findall( - '((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))', - content) - - links = [ - str(raw_link[0]).rstrip('/') for raw_link in raw_links - ] - - return links - -def dup_links(links): - """Check for duplicated links""" - print(f'Checking for duplicated links...') - hasError = False - seen = {} - dupes = [] - - for link in links: - if link not in seen: - seen[link] = 1 - else: - if seen[link] == 1: - dupes.append(link) - - if not dupes: - print(f"No duplicate links") - else: - print(f"Found duplicate links: {dupes}") - hasError = True - return hasError - -def validate_links(links): - """Checks each entry in JSON file for live link""" - print(f'Validating {len(links)} links...') - hasError = False - for link in links: - h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=25) - try: - # fetching host name, removing leading www - host = link.split('//', 1)[1].split('/', 1)[0] - if host[:3] == 'www': - host = host[4:] - - resp = h.request(link + "/", headers={ - # Faking user agent as some hosting services block not-whitelisted UA - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1467.0 Safari/537.36', - # setting host because Cloudflare returns 403 asking for captcha if host is missing - 'host': host - }) - code = int(resp[0]['status']) - # Checking status code errors - if (code >= 400): - hasError = True - print(f"ERR:CLT:{code} : {link}") - except TimeoutError: - hasError = True - print(f"ERR:TMO: {link}") - except socket.error as socketerror: - hasError = True - print(f"ERR:SOC: {socketerror} : {link}") - except Exception as e: - hasError = True - # Ignore some exceptions which are not actually errors. - # The list below should be extended with other exceptions in the future if needed - if (-1 != str(e).find("[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed (_ssl.c:852)")): - print(f"ERR:SSL: {e} : {link}") - elif (-1 != str(e).find("Content purported to be compressed with gzip but failed to decompress.")): - print(f"ERR:GZP: {e} : {link}") - elif (-1 != str(e).find("Unable to find the server at")): - print(f"ERR:SRV: {e} : {link}") - else: - print(f"ERR:UKN: {e} : {link}") - return hasError - -if __name__ == "__main__": - num_args = len(sys.argv) - if num_args < 2: - print("No .md file passed") - sys.exit(1) - links = parse_links(sys.argv[1]) - hasError = dup_links(links) - if not hasError: - hasError = validate_links(links) - if hasError: - sys.exit(1)