Browse Source

Merge pull request #569 from davemachado/update-md2json

Update md2json, validate_format, and validate_links
pull/571/head
Dave Machado 6 years ago
committed by GitHub
parent
commit
e9e6a71d50
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 226 additions and 407 deletions
  1. +2
    -5
      .travis.yml
  2. +1
    -2
      build/build.sh
  3. +0
    -63
      build/condenseMd.js
  4. +1
    -1
      build/main.sh
  5. +0
    -134
      build/md2json.js
  6. +48
    -0
      build/md2json.py
  7. +111
    -0
      build/validate_format.py
  8. +0
    -111
      build/validate_format.rb
  9. +53
    -0
      build/validate_links.py
  10. +0
    -81
      build/validate_links.rb
  11. +10
    -10
      json/entries.json

+ 2
- 5
.travis.yml View File

@@ -1,10 +1,7 @@
language: node_js
language: python
python: "3.6"
notifications:
email: true
before_install:
- rvm install 2.4.0
install:
- gem install httparty ruby-progressbar
before_script:
- cd build
script:


+ 1
- 2
build/build.sh View File

@@ -3,7 +3,6 @@
# create json directory if not already present
mkdir -p ../json
# parse API README and print (minified) JSON to stdout, redirect to /json
node condenseMd.js ../README.md > single_table.md
node md2json.js single_table.md > ../json/entries.min.json
./md2json.py ../README.md > ../json/entries.min.json
# beautify the previously created JSON file, redirect to /json
python -m json.tool ../json/entries.min.json > ../json/entries.json

+ 0
- 63
build/condenseMd.js View File

@@ -1,63 +0,0 @@
fs = require('fs')

function setupMd(filename, anchor) {
fs.readFile(filename, 'utf8', function (err,text) {
if (err) {
return console.log(err);
}
var lines = text.split("\n"),
cur_line = 0,
line = "",
table_name = "",
col_num = 0,
cols = [],
rows = [],
arr = [];


function read_line() {
return lines[cur_line++];
}

while (true) {
var cols = [];
var rows = [];
while (line.indexOf(anchor) == -1 && cur_line != lines.length) {
line = read_line();
}
if (cur_line == lines.length) {
break;
}
table_name = line.split(anchor)[1];
read_line()
read_line()
while (true) {
line = read_line()
if (line.length < 2 || cur_line == lines.length) {
break
}
if (line.indexOf("|") == 0) {
arr.push(line + table_name)
}
}

}
console.log(anchor + " entries")
console.log("API | Description | Auth | HTTPS | Link | Category")
console.log("|---|---|---|---|---|---|")
for (i = 0; i < arr.length; i++) {
console.log(arr[i])
}
});
}

if (process.argv.length < 3) {
console.log("No .md file passed!");
return;
}
if (process.argv.length < 4) {
anchorText = "###";
} else {
anchorText = process.argv[3];
}
setupMd(process.argv[2].toString(), anchorText);

+ 1
- 1
build/main.sh View File

@@ -30,7 +30,7 @@ else
fi

echo "running format validation..."
./validate_format.rb $FORMAT_FILE
./validate_format.py $FORMAT_FILE
if [[ $? != 0 ]]; then
echo "format validation failed!"
exit 1


+ 0
- 134
build/md2json.js View File

@@ -1,134 +0,0 @@
fs = require('fs')

function md_trim(str, context) {
str = str.replace(/(^\s+)|(\s+$)/g, "");

if (context == 1) { // Name
// placeholder for any formatting on name value
} else if (context == 2) { // Description
str = str.replace(".", ""); // remove ending periods on descriptions
} else if (context == 3) { // Auth
if (str.toUpperCase() == "NO") {
str = null
} else {
str = str.replace("`", "").replace("`", "")
}
} else if (context == 4) { // HTTPS
if (str.toUpperCase() == "YES") {
str = true
} else {
str = false
}
} else if (context == 5) { // Link
str = str.replace("[Go!]", "").slice(1, -1);
}
return str;
}

function handle(filename, anchor) {
fs.readFile(filename, 'utf8', function (err,text) {
if (err) {
return console.log(err);
}
var lines = text.split("\n");
var cur_line = 0;
var line = ""
var table_name = "";
var col_num = 0;
var cols = [];
var rows = [];
var entry_count = 0;

function read_line() {
return lines[cur_line++];
}
var root = {};
while (true) {
var cols = [];
var rows = [];
while (line.indexOf(anchor) == -1 && cur_line != lines.length) {
line = read_line();
}
if (cur_line == lines.length) {
break;
}
table_name = line.split(anchor)[1];
table_name = md_trim(table_name, 0)

line = read_line()

if (line) {
line = line.split("|")
for (var j in line) {

line[j] = md_trim(line[j], 0)
if ((j == 0 || j == line.length - 1) && line[j] === "") {

} else {
cols.push(line[j]);
}
}
if (line.length) {
cols = line;
rows.push(cols)
} else {
console.error("markdown expect column title")
break;
}
} else {
console.error("markdown expect table content")
break;
}

line = read_line()

if (!line) {
console.error("markdown expect table spliter")
break;
}
line = read_line()
while (line.indexOf("|") != -1 && cur_line != lines.length) {

var line_this = line.split("|")
var row = []
for (var j in line_this) {
line_this[j] = md_trim(line_this[j], j)
if ((j == 0 || j == line_this.length - 1) && line_this[j] === "") {

} else {
row.push(line_this[j]);
}

}
rows.push(row);
entry_count++;
line = read_line()
}

var data=[];
for (var j in rows) {
if (j != 0) {
var ele = {};
for (var k in rows[j]) {
ele[rows[0][k]] = rows[j][k];
}
data.push(ele);
}
}
root["count"] = entry_count;
root[table_name] = data;
}
console.log(JSON.stringify(root));
});
}

if (process.argv.length < 3) {
console.log("No .md file passed!");
return;
}
if (process.argv.length < 4) {
anchorText = "###";
} else {
anchorText = process.argv[3];
}
handle(process.argv[2].toString(), anchorText);

+ 48
- 0
build/md2json.py View File

@@ -0,0 +1,48 @@
#!/usr/bin/env python3

import json
import sys


def markdown_to_json(filename, anchor):
"""Convert a Markdown file into a JSON string"""
category = ""
entries = []
with open(filename) as fp:
lines = (line.rstrip() for line in fp)
lines = list(line for line in lines if line \
and line.startswith(anchor) or line.startswith('| '))
for line in lines:
if line.startswith(anchor):
category = line.split(anchor)[1].strip()
continue
chunks = [x.strip() for x in line.split('|')[1:-1]]
entry = {
'API': chunks[0],
'Description': chunks[1],
'Auth': None if chunks[2].upper() == 'NO' else chunks[2].strip('`'),
'HTTPS': True if chunks[3].upper() == 'YES' else False,
'Link': chunks[4].replace('[Go!]', '')[1:-1],
'Category': category,
}
entries.append(entry)
final = {
'count': len(entries),
'entries': entries,
}
return json.dumps(final)


def main():
num_args = len(sys.argv)
if num_args < 2:
print("No .md file passed")
sys.exit(1)
if num_args < 3:
anchor = '###'
else:
anchor = sys.argv[2]
print(markdown_to_json(sys.argv[1], anchor))

if __name__ == "__main__":
main()

+ 111
- 0
build/validate_format.py View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3

import json
import string
import sys

anchor = '###'
auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No']
punctuation = ['.', '?', '!']
https_keys = ['Yes', 'No']

index_title = 0
index_desc = 1
index_auth = 2
index_https = 3
index_link = 4

errors = []


def add_error(line_num, message):
"""adds an error to the dynamic error list"""
err = '(L{:03d}) {}'.format(line_num+1, message)
errors.append(err)


def check_format(filename):
"""
validates that each line is formatted correctly,
appending to error list as needed
"""
with open(filename) as fp:
lines = list(line.rstrip() for line in fp)

# START Alphabetical Order
category = ""
sections = {}
section_line_num = {}
for line_num, line in enumerate(lines):
if line.startswith(anchor):
category = line.split(anchor)[1].strip()
sections[category] = []
section_line_num[category] = line_num
continue
if not line.startswith('|') or line.startswith('|---'):
continue
title = [x.strip() for x in line.split('|')[1:-1]][0].upper()
sections[category].append(title)

for category, entries in sections.items():
if sorted(entries) != entries:
add_error(section_line_num[category], '{} section is not in alphabetical order'.format(category))
# END Alphabetical Order

# START Check Entries
for line_num, line in enumerate(lines):
if not line.startswith('|') or line.startswith('|---'):
continue
segments = line.split('|')[1:-1]
# START Global
for segment in segments:
# every line segment should start and end with exactly 1 space
if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1:
add_error(line_num, "each segment must start and end with exactly 1 space")
# END Global
segments = [seg.strip() for seg in segments]
# START Description
# first character should be capitalized
char = segments[index_desc][0]
if char.upper() != char:
add_error(line_num, "first char of Description is not capitalized")
# last character should not punctuation
char = segments[index_desc][-1]
if char in punctuation:
add_error(line_num, "description should not end with {}".format(char))
# END Description
# START Auth
# values should conform to valid options only
auth = segments[index_auth].replace('`', '')
if auth not in auth_keys:
add_error(line_num, "{} is not a valid Auth option".format(auth))
# END Auth
# START HTTPS
# values should conform to valid options only
https = segments[index_https]
if https not in https_keys:
add_error(line_num, "{} is not a valid HTTPS option".format(https))
# END HTTPS
# START Link
# url should be wrapped in '[Go!]()' Markdown syntax
link = segments[index_link]
if not link.startswith('[Go!](http') or not link.endswith(')'):
add_error(line_num, 'link format should be "[Go!](LINK)"')
# END Link
# END Check Entries

def main():
num_args = len(sys.argv)
if num_args < 2:
print("No .md file passed")
sys.exit(1)

check_format(sys.argv[1])
if len(errors) > 0:
for err in errors:
print(err)
sys.exit(1)


if __name__ == "__main__":
main()

+ 0
- 111
build/validate_format.rb View File

@@ -1,111 +0,0 @@
#!/usr/bin/env ruby

auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'No']
punctuation = ['.', '?', '!']
https_keys = ['Yes', 'No']

INDEX_TITLE = 1
INDEX_DESCRIPTION = 2
INDEX_AUTH = 3
INDEX_HTTPS = 4
INDEX_LINK = 5
filename = ARGV[0]
$errors = []

def add_error(line_num, val_index, message)
case val_index
when INDEX_TITLE
segment = "Title"
when INDEX_DESCRIPTION
segment = "Description"
when INDEX_AUTH
segment = "Auth"
when INDEX_HTTPS
segment = "HTTPS"
when INDEX_LINK
segment = "Link"
end
$errors.push("(L%03d) %-14.14s #{message}" % [line_num, segment])
end

################### CHECK ALPHABETICAL ORDER ###################
section = ''
sections = []
section_to_line_num = {}
section_to_entries = Hash.new {|h,k| h[k] = Array.new }
File.foreach(filename).with_index do | line, line_num |
if line.start_with?('###')
section = line.sub('###', '').lstrip.chop
sections.push(section)
section_to_line_num[section] = line_num + 1
end
# Skip non-markdown table lines and table schema lines
if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n")
next
end
# char to check is the first column
check_char = line.split("|")[1].strip.upcase
section_to_entries[section].push(check_char)
end
sections.each do | sect |
if section_to_entries[sect] != section_to_entries[sect].sort
add_error(section_to_line_num[sect], INDEX_TITLE, "#{sect} section is not in alphabetical order")
end
end

#################### CHECK LINE ENTRIES ########################
File.foreach(filename).with_index do | line, line_num |
line_num += 1
# Skip non-markdown table lines and table schema lines
if !line.start_with?('|') || line.eql?("|---|---|---|---|---|\n")
next
end

values = line.split("|")

################### GLOBAL ###################
values.each.with_index do |val, val_index|
msg = ""
case val_index
when INDEX_TITLE..INDEX_LINK
# every line segment should start and end with exactly 1 space
if val[/\A */].size != 1 || val[/ *\z/].size != 1
add_error(line_num, val_index, "string should start and end with exactly 1 space")
end
end
end
################# DESCRIPTION ################
# First character should be capitalized
desc_val = values[INDEX_DESCRIPTION].lstrip.chop
if !/[[:upper:]]/.match(desc_val[0])
add_error(line_num, INDEX_DESCRIPTION, "first char not uppercase")
end
# value should not be punctuated
last_char = desc_val[desc_val.length-1]
if punctuation.include?(last_char)
add_error(line_num, INDEX_DESCRIPTION, "description should not end with \"#{last_char}\"")
end
#################### AUTH ####################
# Values should conform to valid options only
auth_val = values[INDEX_AUTH].lstrip.chop.tr('``', '')
if !auth_keys.include?(auth_val)
add_error(line_num, INDEX_AUTH, "not a valid option: #{auth_val}")
end
#################### HTTPS ###################
# Values should be either "Yes" or "No"
https_val = values[INDEX_HTTPS].lstrip.chop
if !https_keys.include?(https_val)
add_error(line_num, INDEX_HTTPS, "must use \"Yes\" or \"No\": #{https_val}")
end
#################### LINK ####################
# Url should be wrapped in "[Go!]" view
link_val = values[INDEX_LINK].lstrip.chop
if !link_val.start_with?("[Go!](") || !link_val.end_with?(')')
add_error(line_num, INDEX_LINK, "format should be \"[Go!](<LINK>)\": #{link_val}")
end
end
$errors.each do | e |
puts e
end
exit($errors.length)

+ 53
- 0
build/validate_links.py View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3

import httplib2
import json
import socket
import sys


def parse_links(filename):
"""Returns a list of links from JSON object"""
data = json.load(open(filename))
links = []
for entry in data['entries']:
link = entry['Link']
https = True if link.startswith('https') else False
x = {
'link': link,
'https': https,
}
links.append(x)
return links


def validate_links(links):
"""Checks each entry in JSON file for live link"""
print('Validating {} links...'.format(len(links)))
errors = []
for each in links:
link = each['link']
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
try:
resp = h.request(link, 'HEAD')
code = int(resp[0]['status'])
# check if status code is a client or server error
if code >= 404:
errors.append('{}: {}'.format(code, link))
except TimeoutError:
errors.append("TMO: " + link)
except socket.error as socketerror:
errors.append("SOC: {} : {}".format(socketerror, link))
return errors

if __name__ == "__main__":
num_args = len(sys.argv)
if num_args < 2:
print("No .json file passed")
sys.exit(1)
errors = validate_links(parse_links(sys.argv[1]))
if len(errors) > 0:
for err in errors:
print(err)
sys.exit(1)


+ 0
- 81
build/validate_links.rb View File

@@ -1,81 +0,0 @@
#!/usr/bin/env ruby
require 'httparty'
require 'ruby-progressbar'
require 'uri'
allowed_codes = [200, 302, 403, 429]
allowed_links = ["https://www.yelp.com/developers/documentation/v3"]
args = ARGV
filename = args[0]
contents = File.open(filename, 'rb') { |f| f.read }
raw_links = URI.extract(contents, ['http', 'https'])
# Remove trailing ')' from entry URLs
links = []
raw_links.each do |link|
if link.end_with?(')')
links.push(link[0...-1])
else
links.push(link)
end
end
if links.length <= 0
puts "no links to check"
exit(0)
end
fails = []
# Fail on any duplicate elements
dup = links.select{|element| links.count(element) > 1}
if dup.uniq.length > 0
dup.uniq.each do |e|
fails.push("(DUP): #{e}")
end
end
# Remove any duplicates from array
links = links.uniq
count = 0
total = links.length
progressbar = ProgressBar.create(:total => total,
:format => "%a %P% | Processed: %c from %C")
# GET each link and check for valid response code from allowed_codes
links.each do |link|
begin
count += 1
if allowed_links.include?(link)
next
end
res = HTTParty.get(link, timeout: 10)
if res.code.nil?
fails.push("(NIL): #{link}")
next
end
if !allowed_codes.include?(res.code)
fails.push("(#{res.code}): #{link}")
end
rescue HTTParty::RedirectionTooDeep
fails.push("(RTD): #{link}")
rescue Net::ReadTimeout
fails.push("(TMO): #{link}")
rescue Net::OpenTimeout
fails.push("(TMO): #{link}")
rescue OpenSSL::SSL::SSLError
fails.push("(SSL): #{link}")
rescue SocketError
fails.push("(SOK): #{link}")
rescue Errno::ECONNREFUSED
fails.push("(CON): #{link}")
rescue Errno::ECONNRESET
next
end
progressbar.increment
end
puts "#{count}/#{total} links checked"
if fails.length <= 0
puts "all links valid"
exit(0)
else
puts "-- RESULTS --"
fails.sort!
fails.each do |e|
puts e
end
exit(1)
end

+ 10
- 10
json/entries.json View File

@@ -197,7 +197,7 @@
"API": "BookNomads",
"Auth": null,
"Category": "Books",
"Description": "Books published in the Netherlands and Flanders (about 25 million), book covers, and related data",
"Description": "Books published in the Netherlands and Flanders (about 2.5 million), book covers, and related data",
"HTTPS": true,
"Link": "https://www.booknomads.com/dev"
},
@@ -765,7 +765,7 @@
"API": "JSONbin.io",
"Auth": "apiKey",
"Category": "Development",
"Description": "Free JSON storage service Ideal for small scale Web apps, Websites and Mobile apps",
"Description": "Free JSON storage service. Ideal for small scale Web apps, Websites and Mobile apps",
"HTTPS": true,
"Link": "https://jsonbin.io"
},
@@ -1741,7 +1741,7 @@
"API": "BusinessUSA",
"Auth": "apiKey",
"Category": "Government",
"Description": "BusinessUSA gives developers access to authoritative information on US. programs, events, services and more",
"Description": "BusinessUSA gives developers access to authoritative information on U.S. programs, events, services and more",
"HTTPS": true,
"Link": "https://business.usa.gov/developer"
},
@@ -1781,7 +1781,7 @@
"API": "Regulations.gov",
"Auth": "apiKey",
"Category": "Government",
"Description": "Regulationsgov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process",
"Description": "Regulations.gov provides access to Federal regulatory materials and increases public participation and their understanding of the Federal rule making process",
"HTTPS": true,
"Link": "https://regulationsgov.github.io/developers/"
},
@@ -1837,7 +1837,7 @@
"API": "Medicare",
"Auth": null,
"Category": "Health",
"Description": "Access to the data from the CMS - medicaregov",
"Description": "Access to the data from the CMS - medicare.gov",
"HTTPS": true,
"Link": "https://data.medicare.gov/developers"
},
@@ -2477,7 +2477,7 @@
"API": "UPC database",
"Auth": "apiKey",
"Category": "Open Data",
"Description": "More than 15 million barcode numbers from all around the world",
"Description": "More than 1.5 million barcode numbers from all around the world",
"HTTPS": true,
"Link": "https://upcdatabase.org/api"
},
@@ -2517,7 +2517,7 @@
"API": "Drupal.org",
"Auth": null,
"Category": "Open Source Projects",
"Description": "Drupalorg",
"Description": "Drupal.org",
"HTTPS": true,
"Link": "https://www.drupal.org/drupalorg/docs/api"
},
@@ -2733,7 +2733,7 @@
"API": "inspirehep.net",
"Auth": null,
"Category": "Science",
"Description": "High Energy Physics info system",
"Description": "High Energy Physics info. system",
"HTTPS": true,
"Link": "https://inspirehep.net/info/hep/api?ln=en"
},
@@ -2749,7 +2749,7 @@
"API": "Minor Planet Center",
"Auth": null,
"Category": "Science",
"Description": "Asterankcom Information",
"Description": "Asterank.com Information",
"HTTPS": false,
"Link": "http://www.asterank.com/mpc"
},
@@ -2949,7 +2949,7 @@
"API": "Meetup.com",
"Auth": "apiKey",
"Category": "Social",
"Description": "Data about Meetups from Meetupcom",
"Description": "Data about Meetups from Meetup.com",
"HTTPS": true,
"Link": "https://www.meetup.com/meetup_api/"
},


Loading…
Cancel
Save