Update md2json, validate_format, and validate_linkspull/571/head
@@ -1,10 +1,7 @@ | |||
language: node_js | |||
language: python | |||
python: "3.6" | |||
notifications: | |||
email: true | |||
before_install: | |||
- rvm install 2.4.0 | |||
install: | |||
- gem install httparty ruby-progressbar | |||
before_script: | |||
- cd build | |||
script: | |||
@@ -3,7 +3,6 @@ | |||
# create json directory if not already present | |||
mkdir -p ../json | |||
# parse API README and print (minified) JSON to stdout, redirect to /json | |||
node condenseMd.js ../README.md > single_table.md | |||
node md2json.js single_table.md > ../json/entries.min.json | |||
./md2json.py ../README.md > ../json/entries.min.json | |||
# beautify the previously created JSON file, redirect to /json | |||
python -m json.tool ../json/entries.min.json > ../json/entries.json |
@@ -1,63 +0,0 @@ | |||
fs = require('fs') | |||
function setupMd(filename, anchor) { | |||
fs.readFile(filename, 'utf8', function (err,text) { | |||
if (err) { | |||
return console.log(err); | |||
} | |||
var lines = text.split("\n"), | |||
cur_line = 0, | |||
line = "", | |||
table_name = "", | |||
col_num = 0, | |||
cols = [], | |||
rows = [], | |||
arr = []; | |||
function read_line() { | |||
return lines[cur_line++]; | |||
} | |||
while (true) { | |||
var cols = []; | |||
var rows = []; | |||
while (line.indexOf(anchor) == -1 && cur_line != lines.length) { | |||
line = read_line(); | |||
} | |||
if (cur_line == lines.length) { | |||
break; | |||
} | |||
table_name = line.split(anchor)[1]; | |||
read_line() | |||
read_line() | |||
while (true) { | |||
line = read_line() | |||
if (line.length < 2 || cur_line == lines.length) { | |||
break | |||
} | |||
if (line.indexOf("|") == 0) { | |||
arr.push(line + table_name) | |||
} | |||
} | |||
} | |||
console.log(anchor + " entries") | |||
console.log("API | Description | Auth | HTTPS | Link | Category") | |||
console.log("|---|---|---|---|---|---|") | |||
for (i = 0; i < arr.length; i++) { | |||
console.log(arr[i]) | |||
} | |||
}); | |||
} | |||
if (process.argv.length < 3) { | |||
console.log("No .md file passed!"); | |||
return; | |||
} | |||
if (process.argv.length < 4) { | |||
anchorText = "###"; | |||
} else { | |||
anchorText = process.argv[3]; | |||
} | |||
setupMd(process.argv[2].toString(), anchorText); |
@@ -30,7 +30,7 @@ else | |||
fi | |||
echo "running format validation..." | |||
./validate_format.rb $FORMAT_FILE | |||
./validate_format.py $FORMAT_FILE | |||
if [[ $? != 0 ]]; then | |||
echo "format validation failed!" | |||
exit 1 | |||
@@ -1,134 +0,0 @@ | |||
fs = require('fs') | |||
function md_trim(str, context) { | |||
str = str.replace(/(^\s+)|(\s+$)/g, ""); | |||
if (context == 1) { // Name | |||
// placeholder for any formatting on name value | |||
} else if (context == 2) { // Description | |||
str = str.replace(".", ""); // remove ending periods on descriptions | |||
} else if (context == 3) { // Auth | |||
if (str.toUpperCase() == "NO") { | |||
str = null | |||
} else { | |||
str = str.replace("`", "").replace("`", "") | |||
} | |||
} else if (context == 4) { // HTTPS | |||
if (str.toUpperCase() == "YES") { | |||
str = true | |||
} else { | |||
str = false | |||
} | |||
} else if (context == 5) { // Link | |||
str = str.replace("[Go!]", "").slice(1, -1); | |||
} | |||
return str; | |||
} | |||
function handle(filename, anchor) { | |||
fs.readFile(filename, 'utf8', function (err,text) { | |||
if (err) { | |||
return console.log(err); | |||
} | |||
var lines = text.split("\n"); | |||
var cur_line = 0; | |||
var line = "" | |||
var table_name = ""; | |||
var col_num = 0; | |||
var cols = []; | |||
var rows = []; | |||
var entry_count = 0; | |||
function read_line() { | |||
return lines[cur_line++]; | |||
} | |||
var root = {}; | |||
while (true) { | |||
var cols = []; | |||
var rows = []; | |||
while (line.indexOf(anchor) == -1 && cur_line != lines.length) { | |||
line = read_line(); | |||
} | |||
if (cur_line == lines.length) { | |||
break; | |||
} | |||
table_name = line.split(anchor)[1]; | |||
table_name = md_trim(table_name, 0) | |||
line = read_line() | |||
if (line) { | |||
line = line.split("|") | |||
for (var j in line) { | |||
line[j] = md_trim(line[j], 0) | |||
if ((j == 0 || j == line.length - 1) && line[j] === "") { | |||
} else { | |||
cols.push(line[j]); | |||
} | |||
} | |||
if (line.length) { | |||
cols = line; | |||
rows.push(cols) | |||
} else { | |||
console.error("markdown expect column title") | |||
break; | |||
} | |||
} else { | |||
console.error("markdown expect table content") | |||
break; | |||
} | |||
line = read_line() | |||
if (!line) { | |||
console.error("markdown expect table spliter") | |||
break; | |||
} | |||
line = read_line() | |||
while (line.indexOf("|") != -1 && cur_line != lines.length) { | |||
var line_this = line.split("|") | |||
var row = [] | |||
for (var j in line_this) { | |||
line_this[j] = md_trim(line_this[j], j) | |||
if ((j == 0 || j == line_this.length - 1) && line_this[j] === "") { | |||
} else { | |||
row.push(line_this[j]); | |||
} | |||
} | |||
rows.push(row); | |||
entry_count++; | |||
line = read_line() | |||
} | |||
var data=[]; | |||
for (var j in rows) { | |||
if (j != 0) { | |||
var ele = {}; | |||
for (var k in rows[j]) { | |||
ele[rows[0][k]] = rows[j][k]; | |||
} | |||
data.push(ele); | |||
} | |||
} | |||
root["count"] = entry_count; | |||
root[table_name] = data; | |||
} | |||
console.log(JSON.stringify(root)); | |||
}); | |||
} | |||
if (process.argv.length < 3) { | |||
console.log("No .md file passed!"); | |||
return; | |||
} | |||
if (process.argv.length < 4) { | |||
anchorText = "###"; | |||
} else { | |||
anchorText = process.argv[3]; | |||
} | |||
handle(process.argv[2].toString(), anchorText); |
@@ -0,0 +1,48 @@ | |||
#!/usr/bin/env python3 | |||
import json | |||
import sys | |||
def markdown_to_json(filename, anchor): | |||
"""Convert a Markdown file into a JSON string""" | |||
category = "" | |||
entries = [] | |||
with open(filename) as fp: | |||
lines = (line.rstrip() for line in fp) | |||
lines = list(line for line in lines if line \ | |||
and line.startswith(anchor) or line.startswith('| ')) | |||
for line in lines: | |||
if line.startswith(anchor): | |||
category = line.split(anchor)[1].strip() | |||
continue | |||
chunks = [x.strip() for x in line.split('|')[1:-1]] | |||
entry = { | |||
'API': chunks[0], | |||
'Description': chunks[1], | |||
'Auth': None if chunks[2].upper() == 'NO' else chunks[2].strip('`'), | |||
'HTTPS': True if chunks[3].upper() == 'YES' else False, | |||
'Link': chunks[4].replace('[Go!]', '')[1:-1], | |||
'Category': category, | |||
} | |||
entries.append(entry) | |||
final = { | |||
'count': len(entries), | |||
'entries': entries, | |||
} | |||
return json.dumps(final) | |||
def main(): | |||
num_args = len(sys.argv) | |||
if num_args < 2: | |||
print("No .md file passed") | |||
sys.exit(1) | |||
if num_args < 3: | |||
anchor = '###' | |||
else: | |||
anchor = sys.argv[2] | |||
print(markdown_to_json(sys.argv[1], anchor)) | |||
if __name__ == "__main__": | |||
main() |
@@ -0,0 +1,111 @@ | |||
#!/usr/bin/env python3 | |||
import json | |||
import string | |||
import sys | |||
anchor = '###' | |||
auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No'] | |||
punctuation = ['.', '?', '!'] | |||
https_keys = ['Yes', 'No'] | |||
index_title = 0 | |||
index_desc = 1 | |||
index_auth = 2 | |||
index_https = 3 | |||
index_link = 4 | |||
errors = [] | |||
def add_error(line_num, message): | |||
"""adds an error to the dynamic error list""" | |||
err = '(L{:03d}) {}'.format(line_num+1, message) | |||
errors.append(err) | |||
def check_format(filename): | |||
""" | |||
validates that each line is formatted correctly, | |||
appending to error list as needed | |||
""" | |||
with open(filename) as fp: | |||
lines = list(line.rstrip() for line in fp) | |||
# START Alphabetical Order | |||
category = "" | |||
sections = {} | |||
section_line_num = {} | |||
for line_num, line in enumerate(lines): | |||
if line.startswith(anchor): | |||
category = line.split(anchor)[1].strip() | |||
sections[category] = [] | |||
section_line_num[category] = line_num | |||
continue | |||
if not line.startswith('|') or line.startswith('|---'): | |||
continue | |||
title = [x.strip() for x in line.split('|')[1:-1]][0].upper() | |||
sections[category].append(title) | |||
for category, entries in sections.items(): | |||
if sorted(entries) != entries: | |||
add_error(section_line_num[category], '{} section is not in alphabetical order'.format(category)) | |||
# END Alphabetical Order | |||
# START Check Entries | |||
for line_num, line in enumerate(lines): | |||
if not line.startswith('|') or line.startswith('|---'): | |||
continue | |||
segments = line.split('|')[1:-1] | |||
# START Global | |||
for segment in segments: | |||
# every line segment should start and end with exactly 1 space | |||
if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1: | |||
add_error(line_num, "each segment must start and end with exactly 1 space") | |||
# END Global | |||
segments = [seg.strip() for seg in segments] | |||
# START Description | |||
# first character should be capitalized | |||
char = segments[index_desc][0] | |||
if char.upper() != char: | |||
add_error(line_num, "first char of Description is not capitalized") | |||
# last character should not punctuation | |||
char = segments[index_desc][-1] | |||
if char in punctuation: | |||
add_error(line_num, "description should not end with {}".format(char)) | |||
# END Description | |||
# START Auth | |||
# values should conform to valid options only | |||
auth = segments[index_auth].replace('`', '') | |||
if auth not in auth_keys: | |||
add_error(line_num, "{} is not a valid Auth option".format(auth)) | |||
# END Auth | |||
# START HTTPS | |||
# values should conform to valid options only | |||
https = segments[index_https] | |||
if https not in https_keys: | |||
add_error(line_num, "{} is not a valid HTTPS option".format(https)) | |||
# END HTTPS | |||
# START Link | |||
# url should be wrapped in '[Go!]()' Markdown syntax | |||
link = segments[index_link] | |||
if not link.startswith('[Go!](http') or not link.endswith(')'): | |||
add_error(line_num, 'link format should be "[Go!](LINK)"') | |||
# END Link | |||
# END Check Entries | |||
def main(): | |||
num_args = len(sys.argv) | |||
if num_args < 2: | |||
print("No .md file passed") | |||
sys.exit(1) | |||
check_format(sys.argv[1]) | |||
if len(errors) > 0: | |||
for err in errors: | |||
print(err) | |||
sys.exit(1) | |||
if __name__ == "__main__": | |||
main() |
@@ -1,111 +0,0 @@ | |||
#!/usr/bin/env ruby | |||
auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No'] | |||
punctuation = ['.', '?', '!'] | |||
https_keys = ['Yes', 'No'] | |||
INDEX_TITLE = 1 | |||
INDEX_DESCRIPTION = 2 | |||
INDEX_AUTH = 3 | |||
INDEX_HTTPS = 4 | |||
INDEX_LINK = 5 | |||
filename = ARGV[0] | |||
$errors = [] | |||
def add_error(line_num, val_index, message) | |||
case val_index | |||
when INDEX_TITLE | |||
segment = "Title" | |||
when INDEX_DESCRIPTION | |||
segment = "Description" | |||
when INDEX_AUTH | |||
segment = "Auth" | |||
when INDEX_HTTPS | |||
segment = "HTTPS" | |||
when INDEX_LINK | |||
segment = "Link" | |||
end | |||
$errors.push("(L%03d) %-14.14s #{message}" % [line_num, segment]) | |||
end | |||
################### CHECK ALPHABETICAL ORDER ################### | |||
section = '' | |||
sections = [] | |||
section_to_line_num = {} | |||
section_to_entries = Hash.new {|h,k| h[k] = Array.new } | |||
File.foreach(filename).with_index do | line, line_num | | |||
if line.start_with?('###') | |||
section = line.sub('###', '').lstrip.chop | |||
sections.push(section) | |||
section_to_line_num[section] = line_num + 1 | |||
end | |||
# Skip non-markdown table lines and table schema lines | |||
if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n") | |||
next | |||
end | |||
# char to check is the first column | |||
check_char = line.split("|")[1].strip.upcase | |||
section_to_entries[section].push(check_char) | |||
end | |||
sections.each do | sect | | |||
if section_to_entries[sect] != section_to_entries[sect].sort | |||
add_error(section_to_line_num[sect], INDEX_TITLE, "#{sect} section is not in alphabetical order") | |||
end | |||
end | |||
#################### CHECK LINE ENTRIES ######################## | |||
File.foreach(filename).with_index do | line, line_num | | |||
line_num += 1 | |||
# Skip non-markdown table lines and table schema lines | |||
if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n") | |||
next | |||
end | |||
values = line.split("|") | |||
################### GLOBAL ################### | |||
values.each.with_index do |val, val_index| | |||
msg = "" | |||
case val_index | |||
when INDEX_TITLE..INDEX_LINK | |||
# every line segment should start and end with exactly 1 space | |||
if val[/\A */].size != 1 || val[/ *\z/].size != 1 | |||
add_error(line_num, val_index, "string should start and end with exactly 1 space") | |||
end | |||
end | |||
end | |||
################# DESCRIPTION ################ | |||
# First character should be capitalized | |||
desc_val = values[INDEX_DESCRIPTION].lstrip.chop | |||
if !/[[:upper:]]/.match(desc_val[0]) | |||
add_error(line_num, INDEX_DESCRIPTION, "first char not uppercase") | |||
end | |||
# value should not be punctuated | |||
last_char = desc_val[desc_val.length-1] | |||
if punctuation.include?(last_char) | |||
add_error(line_num, INDEX_DESCRIPTION, "description should not end with \"#{last_char}\"") | |||
end | |||
#################### AUTH #################### | |||
# Values should conform to valid options only | |||
auth_val = values[INDEX_AUTH].lstrip.chop.tr('``', '') | |||
if !auth_keys.include?(auth_val) | |||
add_error(line_num, INDEX_AUTH, "not a valid option: #{auth_val}") | |||
end | |||
#################### HTTPS ################### | |||
# Values should be either "Yes" or "No" | |||
https_val = values[INDEX_HTTPS].lstrip.chop | |||
if !https_keys.include?(https_val) | |||
add_error(line_num, INDEX_HTTPS, "must use \"Yes\" or \"No\": #{https_val}") | |||
end | |||
#################### LINK #################### | |||
# Url should be wrapped in "[Go!]" view | |||
link_val = values[INDEX_LINK].lstrip.chop | |||
if !link_val.start_with?("[Go!](") || !link_val.end_with?(')') | |||
add_error(line_num, INDEX_LINK, "format should be \"[Go!](<LINK>)\": #{link_val}") | |||
end | |||
end | |||
$errors.each do | e | | |||
puts e | |||
end | |||
exit($errors.length) |
@@ -0,0 +1,53 @@ | |||
#!/usr/bin/env python3 | |||
import httplib2 | |||
import json | |||
import socket | |||
import sys | |||
def parse_links(filename): | |||
"""Returns a list of links from JSON object""" | |||
data = json.load(open(filename)) | |||
links = [] | |||
for entry in data['entries']: | |||
link = entry['Link'] | |||
https = True if link.startswith('https') else False | |||
x = { | |||
'link': link, | |||
'https': https, | |||
} | |||
links.append(x) | |||
return links | |||
def validate_links(links): | |||
"""Checks each entry in JSON file for live link""" | |||
print('Validating {} links...'.format(len(links))) | |||
errors = [] | |||
for each in links: | |||
link = each['link'] | |||
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5) | |||
try: | |||
resp = h.request(link, 'HEAD') | |||
code = int(resp[0]['status']) | |||
# check if status code is a client or server error | |||
if code >= 404: | |||
errors.append('{}: {}'.format(code, link)) | |||
except TimeoutError: | |||
errors.append("TMO: " + link) | |||
except socket.error as socketerror: | |||
errors.append("SOC: {} : {}".format(socketerror, link)) | |||
return errors | |||
if __name__ == "__main__": | |||
num_args = len(sys.argv) | |||
if num_args < 2: | |||
print("No .json file passed") | |||
sys.exit(1) | |||
errors = validate_links(parse_links(sys.argv[1])) | |||
if len(errors) > 0: | |||
for err in errors: | |||
print(err) | |||
sys.exit(1) | |||
@@ -1,81 +0,0 @@ | |||
#!/usr/bin/env ruby | |||
require 'httparty' | |||
require 'ruby-progressbar' | |||
require 'uri' | |||
allowed_codes = [200, 302, 403, 429] | |||
allowed_links = ["https://www.yelp.com/developers/documentation/v3"] | |||
args = ARGV | |||
filename = args[0] | |||
contents = File.open(filename, 'rb') { |f| f.read } | |||
raw_links = URI.extract(contents, ['http', 'https']) | |||
# Remove trailing ')' from entry URLs | |||
links = [] | |||
raw_links.each do |link| | |||
if link.end_with?(')') | |||
links.push(link[0...-1]) | |||
else | |||
links.push(link) | |||
end | |||
end | |||
if links.length <= 0 | |||
puts "no links to check" | |||
exit(0) | |||
end | |||
fails = [] | |||
# Fail on any duplicate elements | |||
dup = links.select{|element| links.count(element) > 1} | |||
if dup.uniq.length > 0 | |||
dup.uniq.each do |e| | |||
fails.push("(DUP): #{e}") | |||
end | |||
end | |||
# Remove any duplicates from array | |||
links = links.uniq | |||
count = 0 | |||
total = links.length | |||
progressbar = ProgressBar.create(:total => total, | |||
:format => "%a %P% | Processed: %c from %C") | |||
# GET each link and check for valid response code from allowed_codes | |||
links.each do |link| | |||
begin | |||
count += 1 | |||
if allowed_links.include?(link) | |||
next | |||
end | |||
res = HTTParty.get(link, timeout: 10) | |||
if res.code.nil? | |||
fails.push("(NIL): #{link}") | |||
next | |||
end | |||
if !allowed_codes.include?(res.code) | |||
fails.push("(#{res.code}): #{link}") | |||
end | |||
rescue HTTParty::RedirectionTooDeep | |||
fails.push("(RTD): #{link}") | |||
rescue Net::ReadTimeout | |||
fails.push("(TMO): #{link}") | |||
rescue Net::OpenTimeout | |||
fails.push("(TMO): #{link}") | |||
rescue OpenSSL::SSL::SSLError | |||
fails.push("(SSL): #{link}") | |||
rescue SocketError | |||
fails.push("(SOK): #{link}") | |||
rescue Errno::ECONNREFUSED | |||
fails.push("(CON): #{link}") | |||
rescue Errno::ECONNRESET | |||
next | |||
end | |||
progressbar.increment | |||
end | |||
puts "#{count}/#{total} links checked" | |||
if fails.length <= 0 | |||
puts "all links valid" | |||
exit(0) | |||
else | |||
puts "-- RESULTS --" | |||
fails.sort! | |||
fails.each do |e| | |||
puts e | |||
end | |||
exit(1) | |||
end |
@@ -197,7 +197,7 @@ | |||
"API": "BookNomads", | |||
"Auth": null, | |||
"Category": "Books", | |||
"Description": "Books published in the Netherlands and Flanders (about 25 million), book covers, and related data", | |||
"Description": "Books published in the Netherlands and Flanders (about 2.5 million), book covers, and related data", | |||
"HTTPS": true, | |||
"Link": "https://www.booknomads.com/dev" | |||
}, | |||
@@ -765,7 +765,7 @@ | |||
"API": "JSONbin.io", | |||
"Auth": "apiKey", | |||
"Category": "Development", | |||
"Description": "Free JSON storage service Ideal for small scale Web apps, Websites and Mobile apps", | |||
"Description": "Free JSON storage service. Ideal for small scale Web apps, Websites and Mobile apps", | |||
"HTTPS": true, | |||
"Link": "https://jsonbin.io" | |||
}, | |||
@@ -1741,7 +1741,7 @@ | |||
"API": "BusinessUSA", | |||
"Auth": "apiKey", | |||
"Category": "Government", | |||
"Description": "BusinessUSA gives developers access to authoritative information on US. programs, events, services and more", | |||
"Description": "BusinessUSA gives developers access to authoritative information on U.S. programs, events, services and more", | |||
"HTTPS": true, | |||
"Link": "https://business.usa.gov/developer" | |||
}, | |||
@@ -1781,7 +1781,7 @@ | |||
"API": "Regulations.gov", | |||
"Auth": "apiKey", | |||
"Category": "Government", | |||
"Description": "Regulationsgov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process", | |||
"Description": "Regulations.gov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process", | |||
"HTTPS": true, | |||
"Link": "https://regulationsgov.github.io/developers/" | |||
}, | |||
@@ -1837,7 +1837,7 @@ | |||
"API": "Medicare", | |||
"Auth": null, | |||
"Category": "Health", | |||
"Description": "Access to the data from the CMS - medicaregov", | |||
"Description": "Access to the data from the CMS - medicare.gov", | |||
"HTTPS": true, | |||
"Link": "https://data.medicare.gov/developers" | |||
}, | |||
@@ -2477,7 +2477,7 @@ | |||
"API": "UPC database", | |||
"Auth": "apiKey", | |||
"Category": "Open Data", | |||
"Description": "More than 15 million barcode numbers from all around the world", | |||
"Description": "More than 1.5 million barcode numbers from all around the world", | |||
"HTTPS": true, | |||
"Link": "https://upcdatabase.org/api" | |||
}, | |||
@@ -2517,7 +2517,7 @@ | |||
"API": "Drupal.org", | |||
"Auth": null, | |||
"Category": "Open Source Projects", | |||
"Description": "Drupalorg", | |||
"Description": "Drupal.org", | |||
"HTTPS": true, | |||
"Link": "https://www.drupal.org/drupalorg/docs/api" | |||
}, | |||
@@ -2733,7 +2733,7 @@ | |||
"API": "inspirehep.net", | |||
"Auth": null, | |||
"Category": "Science", | |||
"Description": "High Energy Physics info system", | |||
"Description": "High Energy Physics info. system", | |||
"HTTPS": true, | |||
"Link": "https://inspirehep.net/info/hep/api?ln=en" | |||
}, | |||
@@ -2749,7 +2749,7 @@ | |||
"API": "Minor Planet Center", | |||
"Auth": null, | |||
"Category": "Science", | |||
"Description": "Asterankcom Information", | |||
"Description": "Asterank.com Information", | |||
"HTTPS": false, | |||
"Link": "http://www.asterank.com/mpc" | |||
}, | |||
@@ -2949,7 +2949,7 @@ | |||
"API": "Meetup.com", | |||
"Auth": "apiKey", | |||
"Category": "Social", | |||
"Description": "Data about Meetups from Meetupcom", | |||
"Description": "Data about Meetups from Meetup.com", | |||
"HTTPS": true, | |||
"Link": "https://www.meetup.com/meetup_api/" | |||
}, | |||