diff --git a/.travis.yml b/.travis.yml index d3244546..2801914d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,10 +1,7 @@ -language: node_js +language: python +python: "3.6" notifications: email: true -before_install: - - rvm install 2.4.0 -install: - - gem install httparty ruby-progressbar before_script: - cd build script: diff --git a/build/build.sh b/build/build.sh index 8143315e..0f23d630 100755 --- a/build/build.sh +++ b/build/build.sh @@ -3,7 +3,6 @@ # create json directory if not already present mkdir -p ../json # parse API README and print (minified) JSON to stdout, redirect to /json -node condenseMd.js ../README.md > single_table.md -node md2json.js single_table.md > ../json/entries.min.json +./md2json.py ../README.md > ../json/entries.min.json # beautify the previously created JSON file, redirect to /json python -m json.tool ../json/entries.min.json > ../json/entries.json diff --git a/build/condenseMd.js b/build/condenseMd.js deleted file mode 100644 index b1eb1735..00000000 --- a/build/condenseMd.js +++ /dev/null @@ -1,63 +0,0 @@ -fs = require('fs') - -function setupMd(filename, anchor) { - fs.readFile(filename, 'utf8', function (err,text) { - if (err) { - return console.log(err); - } - var lines = text.split("\n"), - cur_line = 0, - line = "", - table_name = "", - col_num = 0, - cols = [], - rows = [], - arr = []; - - - function read_line() { - return lines[cur_line++]; - } - - while (true) { - var cols = []; - var rows = []; - while (line.indexOf(anchor) == -1 && cur_line != lines.length) { - line = read_line(); - } - if (cur_line == lines.length) { - break; - } - table_name = line.split(anchor)[1]; - read_line() - read_line() - while (true) { - line = read_line() - if (line.length < 2 || cur_line == lines.length) { - break - } - if (line.indexOf("|") == 0) { - arr.push(line + table_name) - } - } - - } - console.log(anchor + " entries") - console.log("API | Description | Auth | HTTPS | Link | Category") - console.log("|---|---|---|---|---|---|") - for (i = 0; i < arr.length; i++) { - console.log(arr[i]) - } - }); -} - -if (process.argv.length < 3) { - console.log("No .md file passed!"); - return; -} -if (process.argv.length < 4) { - anchorText = "###"; -} else { - anchorText = process.argv[3]; -} -setupMd(process.argv[2].toString(), anchorText); diff --git a/build/main.sh b/build/main.sh index f95a2278..3d5ce9fc 100755 --- a/build/main.sh +++ b/build/main.sh @@ -30,7 +30,7 @@ else fi echo "running format validation..." -./validate_format.rb $FORMAT_FILE +./validate_format.py $FORMAT_FILE if [[ $? != 0 ]]; then echo "format validation failed!" exit 1 diff --git a/build/md2json.js b/build/md2json.js deleted file mode 100644 index dd2112ba..00000000 --- a/build/md2json.js +++ /dev/null @@ -1,134 +0,0 @@ -fs = require('fs') - -function md_trim(str, context) { - str = str.replace(/(^\s+)|(\s+$)/g, ""); - - if (context == 1) { // Name - // placeholder for any formatting on name value - } else if (context == 2) { // Description - str = str.replace(".", ""); // remove ending periods on descriptions - } else if (context == 3) { // Auth - if (str.toUpperCase() == "NO") { - str = null - } else { - str = str.replace("`", "").replace("`", "") - } - } else if (context == 4) { // HTTPS - if (str.toUpperCase() == "YES") { - str = true - } else { - str = false - } - } else if (context == 5) { // Link - str = str.replace("[Go!]", "").slice(1, -1); - } - return str; -} - -function handle(filename, anchor) { - fs.readFile(filename, 'utf8', function (err,text) { - if (err) { - return console.log(err); - } - var lines = text.split("\n"); - var cur_line = 0; - var line = "" - var table_name = ""; - var col_num = 0; - var cols = []; - var rows = []; - var entry_count = 0; - - function read_line() { - return lines[cur_line++]; - } - var root = {}; - while (true) { - var cols = []; - var rows = []; - while (line.indexOf(anchor) == -1 && cur_line != lines.length) { - line = read_line(); - } - if (cur_line == lines.length) { - break; - } - table_name = line.split(anchor)[1]; - table_name = md_trim(table_name, 0) - - line = read_line() - - if (line) { - line = line.split("|") - for (var j in line) { - - line[j] = md_trim(line[j], 0) - if ((j == 0 || j == line.length - 1) && line[j] === "") { - - } else { - cols.push(line[j]); - } - } - if (line.length) { - cols = line; - rows.push(cols) - } else { - console.error("markdown expect column title") - break; - } - } else { - console.error("markdown expect table content") - break; - } - - line = read_line() - - if (!line) { - console.error("markdown expect table spliter") - break; - } - line = read_line() - while (line.indexOf("|") != -1 && cur_line != lines.length) { - - var line_this = line.split("|") - var row = [] - for (var j in line_this) { - line_this[j] = md_trim(line_this[j], j) - if ((j == 0 || j == line_this.length - 1) && line_this[j] === "") { - - } else { - row.push(line_this[j]); - } - - } - rows.push(row); - entry_count++; - line = read_line() - } - - var data=[]; - for (var j in rows) { - if (j != 0) { - var ele = {}; - for (var k in rows[j]) { - ele[rows[0][k]] = rows[j][k]; - } - data.push(ele); - } - } - root["count"] = entry_count; - root[table_name] = data; - } - console.log(JSON.stringify(root)); - }); -} - -if (process.argv.length < 3) { - console.log("No .md file passed!"); - return; -} -if (process.argv.length < 4) { - anchorText = "###"; -} else { - anchorText = process.argv[3]; -} -handle(process.argv[2].toString(), anchorText); diff --git a/build/md2json.py b/build/md2json.py new file mode 100755 index 00000000..86bd5e81 --- /dev/null +++ b/build/md2json.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import json +import sys + + +def markdown_to_json(filename, anchor): + """Convert a Markdown file into a JSON string""" + category = "" + entries = [] + with open(filename) as fp: + lines = (line.rstrip() for line in fp) + lines = list(line for line in lines if line \ + and line.startswith(anchor) or line.startswith('| ')) + for line in lines: + if line.startswith(anchor): + category = line.split(anchor)[1].strip() + continue + chunks = [x.strip() for x in line.split('|')[1:-1]] + entry = { + 'API': chunks[0], + 'Description': chunks[1], + 'Auth': None if chunks[2].upper() == 'NO' else chunks[2].strip('`'), + 'HTTPS': True if chunks[3].upper() == 'YES' else False, + 'Link': chunks[4].replace('[Go!]', '')[1:-1], + 'Category': category, + } + entries.append(entry) + final = { + 'count': len(entries), + 'entries': entries, + } + return json.dumps(final) + + +def main(): + num_args = len(sys.argv) + if num_args < 2: + print("No .md file passed") + sys.exit(1) + if num_args < 3: + anchor = '###' + else: + anchor = sys.argv[2] + print(markdown_to_json(sys.argv[1], anchor)) + +if __name__ == "__main__": + main() diff --git a/build/validate_format.py b/build/validate_format.py new file mode 100755 index 00000000..7b3df231 --- /dev/null +++ b/build/validate_format.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python3 + +import json +import string +import sys + +anchor = '###' +auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No'] +punctuation = ['.', '?', '!'] +https_keys = ['Yes', 'No'] + +index_title = 0 +index_desc = 1 +index_auth = 2 +index_https = 3 +index_link = 4 + +errors = [] + + +def add_error(line_num, message): + """adds an error to the dynamic error list""" + err = '(L{:03d}) {}'.format(line_num+1, message) + errors.append(err) + + +def check_format(filename): + """ + validates that each line is formatted correctly, + appending to error list as needed + """ + with open(filename) as fp: + lines = list(line.rstrip() for line in fp) + + # START Alphabetical Order + category = "" + sections = {} + section_line_num = {} + for line_num, line in enumerate(lines): + if line.startswith(anchor): + category = line.split(anchor)[1].strip() + sections[category] = [] + section_line_num[category] = line_num + continue + if not line.startswith('|') or line.startswith('|---'): + continue + title = [x.strip() for x in line.split('|')[1:-1]][0].upper() + sections[category].append(title) + + for category, entries in sections.items(): + if sorted(entries) != entries: + add_error(section_line_num[category], '{} section is not in alphabetical order'.format(category)) + # END Alphabetical Order + + # START Check Entries + for line_num, line in enumerate(lines): + if not line.startswith('|') or line.startswith('|---'): + continue + segments = line.split('|')[1:-1] + # START Global + for segment in segments: + # every line segment should start and end with exactly 1 space + if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1: + add_error(line_num, "each segment must start and end with exactly 1 space") + # END Global + segments = [seg.strip() for seg in segments] + # START Description + # first character should be capitalized + char = segments[index_desc][0] + if char.upper() != char: + add_error(line_num, "first char of Description is not capitalized") + # last character should not punctuation + char = segments[index_desc][-1] + if char in punctuation: + add_error(line_num, "description should not end with {}".format(char)) + # END Description + # START Auth + # values should conform to valid options only + auth = segments[index_auth].replace('`', '') + if auth not in auth_keys: + add_error(line_num, "{} is not a valid Auth option".format(auth)) + # END Auth + # START HTTPS + # values should conform to valid options only + https = segments[index_https] + if https not in https_keys: + add_error(line_num, "{} is not a valid HTTPS option".format(https)) + # END HTTPS + # START Link + # url should be wrapped in '[Go!]()' Markdown syntax + link = segments[index_link] + if not link.startswith('[Go!](http') or not link.endswith(')'): + add_error(line_num, 'link format should be "[Go!](LINK)"') + # END Link + # END Check Entries + +def main(): + num_args = len(sys.argv) + if num_args < 2: + print("No .md file passed") + sys.exit(1) + + check_format(sys.argv[1]) + if len(errors) > 0: + for err in errors: + print(err) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/build/validate_format.rb b/build/validate_format.rb deleted file mode 100755 index d746224a..00000000 --- a/build/validate_format.rb +++ /dev/null @@ -1,111 +0,0 @@ -#!/usr/bin/env ruby - -auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No'] -punctuation = ['.', '?', '!'] -https_keys = ['Yes', 'No'] - -INDEX_TITLE = 1 -INDEX_DESCRIPTION = 2 -INDEX_AUTH = 3 -INDEX_HTTPS = 4 -INDEX_LINK = 5 -filename = ARGV[0] -$errors = [] - -def add_error(line_num, val_index, message) - case val_index - when INDEX_TITLE - segment = "Title" - when INDEX_DESCRIPTION - segment = "Description" - when INDEX_AUTH - segment = "Auth" - when INDEX_HTTPS - segment = "HTTPS" - when INDEX_LINK - segment = "Link" - end - $errors.push("(L%03d) %-14.14s #{message}" % [line_num, segment]) -end - -################### CHECK ALPHABETICAL ORDER ################### -section = '' -sections = [] -section_to_line_num = {} -section_to_entries = Hash.new {|h,k| h[k] = Array.new } -File.foreach(filename).with_index do | line, line_num | - if line.start_with?('###') - section = line.sub('###', '').lstrip.chop - sections.push(section) - section_to_line_num[section] = line_num + 1 - end - # Skip non-markdown table lines and table schema lines - if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n") - next - end - # char to check is the first column - check_char = line.split("|")[1].strip.upcase - section_to_entries[section].push(check_char) -end -sections.each do | sect | - if section_to_entries[sect] != section_to_entries[sect].sort - add_error(section_to_line_num[sect], INDEX_TITLE, "#{sect} section is not in alphabetical order") - end -end - -#################### CHECK LINE ENTRIES ######################## -File.foreach(filename).with_index do | line, line_num | - line_num += 1 - - # Skip non-markdown table lines and table schema lines - if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n") - next - end - - values = line.split("|") - - ################### GLOBAL ################### - values.each.with_index do |val, val_index| - msg = "" - case val_index - when INDEX_TITLE..INDEX_LINK - # every line segment should start and end with exactly 1 space - if val[/\A */].size != 1 || val[/ *\z/].size != 1 - add_error(line_num, val_index, "string should start and end with exactly 1 space") - end - end - end - ################# DESCRIPTION ################ - # First character should be capitalized - desc_val = values[INDEX_DESCRIPTION].lstrip.chop - if !/[[:upper:]]/.match(desc_val[0]) - add_error(line_num, INDEX_DESCRIPTION, "first char not uppercase") - end - # value should not be punctuated - last_char = desc_val[desc_val.length-1] - if punctuation.include?(last_char) - add_error(line_num, INDEX_DESCRIPTION, "description should not end with \"#{last_char}\"") - end - #################### AUTH #################### - # Values should conform to valid options only - auth_val = values[INDEX_AUTH].lstrip.chop.tr('``', '') - if !auth_keys.include?(auth_val) - add_error(line_num, INDEX_AUTH, "not a valid option: #{auth_val}") - end - #################### HTTPS ################### - # Values should be either "Yes" or "No" - https_val = values[INDEX_HTTPS].lstrip.chop - if !https_keys.include?(https_val) - add_error(line_num, INDEX_HTTPS, "must use \"Yes\" or \"No\": #{https_val}") - end - #################### LINK #################### - # Url should be wrapped in "[Go!]" view - link_val = values[INDEX_LINK].lstrip.chop - if !link_val.start_with?("[Go!](") || !link_val.end_with?(')') - add_error(line_num, INDEX_LINK, "format should be \"[Go!]()\": #{link_val}") - end -end -$errors.each do | e | - puts e -end -exit($errors.length) diff --git a/build/validate_links.py b/build/validate_links.py new file mode 100755 index 00000000..49bb4f88 --- /dev/null +++ b/build/validate_links.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import httplib2 +import json +import socket +import sys + + +def parse_links(filename): + """Returns a list of links from JSON object""" + data = json.load(open(filename)) + links = [] + for entry in data['entries']: + link = entry['Link'] + https = True if link.startswith('https') else False + x = { + 'link': link, + 'https': https, + } + links.append(x) + return links + + +def validate_links(links): + """Checks each entry in JSON file for live link""" + print('Validating {} links...'.format(len(links))) + errors = [] + for each in links: + link = each['link'] + h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5) + try: + resp = h.request(link, 'HEAD') + code = int(resp[0]['status']) + # check if status code is a client or server error + if code >= 404: + errors.append('{}: {}'.format(code, link)) + except TimeoutError: + errors.append("TMO: " + link) + except socket.error as socketerror: + errors.append("SOC: {} : {}".format(socketerror, link)) + return errors + +if __name__ == "__main__": + num_args = len(sys.argv) + if num_args < 2: + print("No .json file passed") + sys.exit(1) + errors = validate_links(parse_links(sys.argv[1])) + if len(errors) > 0: + for err in errors: + print(err) + sys.exit(1) + diff --git a/build/validate_links.rb b/build/validate_links.rb deleted file mode 100755 index d236e7c5..00000000 --- a/build/validate_links.rb +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env ruby -require 'httparty' -require 'ruby-progressbar' -require 'uri' -allowed_codes = [200, 302, 403, 429] -allowed_links = ["https://www.yelp.com/developers/documentation/v3"] -args = ARGV -filename = args[0] -contents = File.open(filename, 'rb') { |f| f.read } -raw_links = URI.extract(contents, ['http', 'https']) -# Remove trailing ')' from entry URLs -links = [] -raw_links.each do |link| - if link.end_with?(')') - links.push(link[0...-1]) - else - links.push(link) - end -end -if links.length <= 0 - puts "no links to check" - exit(0) -end -fails = [] -# Fail on any duplicate elements -dup = links.select{|element| links.count(element) > 1} -if dup.uniq.length > 0 - dup.uniq.each do |e| - fails.push("(DUP): #{e}") - end -end -# Remove any duplicates from array -links = links.uniq -count = 0 -total = links.length -progressbar = ProgressBar.create(:total => total, - :format => "%a %P% | Processed: %c from %C") -# GET each link and check for valid response code from allowed_codes -links.each do |link| - begin - count += 1 - if allowed_links.include?(link) - next - end - res = HTTParty.get(link, timeout: 10) - if res.code.nil? - fails.push("(NIL): #{link}") - next - end - if !allowed_codes.include?(res.code) - fails.push("(#{res.code}): #{link}") - end - rescue HTTParty::RedirectionTooDeep - fails.push("(RTD): #{link}") - rescue Net::ReadTimeout - fails.push("(TMO): #{link}") - rescue Net::OpenTimeout - fails.push("(TMO): #{link}") - rescue OpenSSL::SSL::SSLError - fails.push("(SSL): #{link}") - rescue SocketError - fails.push("(SOK): #{link}") - rescue Errno::ECONNREFUSED - fails.push("(CON): #{link}") - rescue Errno::ECONNRESET - next - end - progressbar.increment -end -puts "#{count}/#{total} links checked" -if fails.length <= 0 - puts "all links valid" - exit(0) -else - puts "-- RESULTS --" - fails.sort! - fails.each do |e| - puts e - end - exit(1) -end diff --git a/json/entries.json b/json/entries.json index e63696ad..a4aa5a52 100644 --- a/json/entries.json +++ b/json/entries.json @@ -197,7 +197,7 @@ "API": "BookNomads", "Auth": null, "Category": "Books", - "Description": "Books published in the Netherlands and Flanders (about 25 million), book covers, and related data", + "Description": "Books published in the Netherlands and Flanders (about 2.5 million), book covers, and related data", "HTTPS": true, "Link": "https://www.booknomads.com/dev" }, @@ -765,7 +765,7 @@ "API": "JSONbin.io", "Auth": "apiKey", "Category": "Development", - "Description": "Free JSON storage service Ideal for small scale Web apps, Websites and Mobile apps", + "Description": "Free JSON storage service. Ideal for small scale Web apps, Websites and Mobile apps", "HTTPS": true, "Link": "https://jsonbin.io" }, @@ -1741,7 +1741,7 @@ "API": "BusinessUSA", "Auth": "apiKey", "Category": "Government", - "Description": "BusinessUSA gives developers access to authoritative information on US. programs, events, services and more", + "Description": "BusinessUSA gives developers access to authoritative information on U.S. programs, events, services and more", "HTTPS": true, "Link": "https://business.usa.gov/developer" }, @@ -1781,7 +1781,7 @@ "API": "Regulations.gov", "Auth": "apiKey", "Category": "Government", - "Description": "Regulationsgov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process", + "Description": "Regulations.gov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process", "HTTPS": true, "Link": "https://regulationsgov.github.io/developers/" }, @@ -1837,7 +1837,7 @@ "API": "Medicare", "Auth": null, "Category": "Health", - "Description": "Access to the data from the CMS - medicaregov", + "Description": "Access to the data from the CMS - medicare.gov", "HTTPS": true, "Link": "https://data.medicare.gov/developers" }, @@ -2477,7 +2477,7 @@ "API": "UPC database", "Auth": "apiKey", "Category": "Open Data", - "Description": "More than 15 million barcode numbers from all around the world", + "Description": "More than 1.5 million barcode numbers from all around the world", "HTTPS": true, "Link": "https://upcdatabase.org/api" }, @@ -2517,7 +2517,7 @@ "API": "Drupal.org", "Auth": null, "Category": "Open Source Projects", - "Description": "Drupalorg", + "Description": "Drupal.org", "HTTPS": true, "Link": "https://www.drupal.org/drupalorg/docs/api" }, @@ -2733,7 +2733,7 @@ "API": "inspirehep.net", "Auth": null, "Category": "Science", - "Description": "High Energy Physics info system", + "Description": "High Energy Physics info. system", "HTTPS": true, "Link": "https://inspirehep.net/info/hep/api?ln=en" }, @@ -2749,7 +2749,7 @@ "API": "Minor Planet Center", "Auth": null, "Category": "Science", - "Description": "Asterankcom Information", + "Description": "Asterank.com Information", "HTTPS": false, "Link": "http://www.asterank.com/mpc" }, @@ -2949,7 +2949,7 @@ "API": "Meetup.com", "Auth": "apiKey", "Category": "Social", - "Description": "Data about Meetups from Meetupcom", + "Description": "Data about Meetups from Meetup.com", "HTTPS": true, "Link": "https://www.meetup.com/meetup_api/" },