Browse Source

Replace Ruby link validation with Python

pull/569/head
Dave Machado 6 years ago
parent
commit
3c12e3a7c9
No known key found for this signature in database GPG Key ID: 948D4778D01A7B3F
2 changed files with 53 additions and 81 deletions
  1. +53
    -0
      build/validate_links.py
  2. +0
    -81
      build/validate_links.rb

+ 53
- 0
build/validate_links.py View File

@@ -0,0 +1,53 @@
#!/usr/bin/env python3

import httplib2
import json
import socket
import sys


def parse_links(filename):
"""Returns a list of links from JSON object"""
data = json.load(open(filename))
links = []
for entry in data['entries']:
link = entry['Link']
https = True if link.startswith('https') else False
x = {
'link': link,
'https': https,
}
links.append(x)
return links


def validate_links(links):
"""Checks each entry in JSON file for live link"""
print('Validating {} links...'.format(len(links)))
errors = []
for each in links:
link = each['link']
h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5)
try:
resp = h.request(link, 'HEAD')
code = int(resp[0]['status'])
# check if status code is a client or server error
if code >= 404:
errors.append('{}: {}'.format(code, link))
except TimeoutError:
errors.append("TMO: " + link)
except socket.error as socketerror:
errors.append("SOC: {} : {}".format(socketerror, link))
return errors

if __name__ == "__main__":
num_args = len(sys.argv)
if num_args < 2:
print("No .json file passed")
sys.exit(1)
errors = validate_links(parse_links(sys.argv[1]))
if len(errors) > 0:
for err in errors:
print(err)
sys.exit(1)


+ 0
- 81
build/validate_links.rb View File

@@ -1,81 +0,0 @@
#!/usr/bin/env ruby
require 'httparty'
require 'ruby-progressbar'
require 'uri'
allowed_codes = [200, 302, 403, 429]
allowed_links = ["https://www.yelp.com/developers/documentation/v3"]
args = ARGV
filename = args[0]
contents = File.open(filename, 'rb') { |f| f.read }
raw_links = URI.extract(contents, ['http', 'https'])
# Remove trailing ')' from entry URLs
links = []
raw_links.each do |link|
if link.end_with?(')')
links.push(link[0...-1])
else
links.push(link)
end
end
if links.length <= 0
puts "no links to check"
exit(0)
end
fails = []
# Fail on any duplicate elements
dup = links.select{|element| links.count(element) > 1}
if dup.uniq.length > 0
dup.uniq.each do |e|
fails.push("(DUP): #{e}")
end
end
# Remove any duplicates from array
links = links.uniq
count = 0
total = links.length
progressbar = ProgressBar.create(:total => total,
:format => "%a %P% | Processed: %c from %C")
# GET each link and check for valid response code from allowed_codes
links.each do |link|
begin
count += 1
if allowed_links.include?(link)
next
end
res = HTTParty.get(link, timeout: 10)
if res.code.nil?
fails.push("(NIL): #{link}")
next
end
if !allowed_codes.include?(res.code)
fails.push("(#{res.code}): #{link}")
end
rescue HTTParty::RedirectionTooDeep
fails.push("(RTD): #{link}")
rescue Net::ReadTimeout
fails.push("(TMO): #{link}")
rescue Net::OpenTimeout
fails.push("(TMO): #{link}")
rescue OpenSSL::SSL::SSLError
fails.push("(SSL): #{link}")
rescue SocketError
fails.push("(SOK): #{link}")
rescue Errno::ECONNREFUSED
fails.push("(CON): #{link}")
rescue Errno::ECONNRESET
next
end
progressbar.increment
end
puts "#{count}/#{total} links checked"
if fails.length <= 0
puts "all links valid"
exit(0)
else
puts "-- RESULTS --"
fails.sort!
fails.each do |e|
puts e
end
exit(1)
end

Loading…
Cancel
Save