From 3c12e3a7c9934fbb55165979fcb3e313c3f1316a Mon Sep 17 00:00:00 2001 From: Dave Machado Date: Sat, 23 Dec 2017 19:24:15 -0500 Subject: [PATCH] Replace Ruby link validation with Python --- build/validate_links.py | 53 +++++++++++++++++++++++++++ build/validate_links.rb | 81 ----------------------------------------- 2 files changed, 53 insertions(+), 81 deletions(-) create mode 100755 build/validate_links.py delete mode 100755 build/validate_links.rb diff --git a/build/validate_links.py b/build/validate_links.py new file mode 100755 index 00000000..49bb4f88 --- /dev/null +++ b/build/validate_links.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + +import httplib2 +import json +import socket +import sys + + +def parse_links(filename): + """Returns a list of links from JSON object""" + data = json.load(open(filename)) + links = [] + for entry in data['entries']: + link = entry['Link'] + https = True if link.startswith('https') else False + x = { + 'link': link, + 'https': https, + } + links.append(x) + return links + + +def validate_links(links): + """Checks each entry in JSON file for live link""" + print('Validating {} links...'.format(len(links))) + errors = [] + for each in links: + link = each['link'] + h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5) + try: + resp = h.request(link, 'HEAD') + code = int(resp[0]['status']) + # check if status code is a client or server error + if code >= 404: + errors.append('{}: {}'.format(code, link)) + except TimeoutError: + errors.append("TMO: " + link) + except socket.error as socketerror: + errors.append("SOC: {} : {}".format(socketerror, link)) + return errors + +if __name__ == "__main__": + num_args = len(sys.argv) + if num_args < 2: + print("No .json file passed") + sys.exit(1) + errors = validate_links(parse_links(sys.argv[1])) + if len(errors) > 0: + for err in errors: + print(err) + sys.exit(1) + diff --git a/build/validate_links.rb b/build/validate_links.rb deleted file mode 100755 index d236e7c5..00000000 --- a/build/validate_links.rb +++ /dev/null @@ -1,81 +0,0 @@ -#!/usr/bin/env ruby -require 'httparty' -require 'ruby-progressbar' -require 'uri' -allowed_codes = [200, 302, 403, 429] -allowed_links = ["https://www.yelp.com/developers/documentation/v3"] -args = ARGV -filename = args[0] -contents = File.open(filename, 'rb') { |f| f.read } -raw_links = URI.extract(contents, ['http', 'https']) -# Remove trailing ')' from entry URLs -links = [] -raw_links.each do |link| - if link.end_with?(')') - links.push(link[0...-1]) - else - links.push(link) - end -end -if links.length <= 0 - puts "no links to check" - exit(0) -end -fails = [] -# Fail on any duplicate elements -dup = links.select{|element| links.count(element) > 1} -if dup.uniq.length > 0 - dup.uniq.each do |e| - fails.push("(DUP): #{e}") - end -end -# Remove any duplicates from array -links = links.uniq -count = 0 -total = links.length -progressbar = ProgressBar.create(:total => total, - :format => "%a %P% | Processed: %c from %C") -# GET each link and check for valid response code from allowed_codes -links.each do |link| - begin - count += 1 - if allowed_links.include?(link) - next - end - res = HTTParty.get(link, timeout: 10) - if res.code.nil? - fails.push("(NIL): #{link}") - next - end - if !allowed_codes.include?(res.code) - fails.push("(#{res.code}): #{link}") - end - rescue HTTParty::RedirectionTooDeep - fails.push("(RTD): #{link}") - rescue Net::ReadTimeout - fails.push("(TMO): #{link}") - rescue Net::OpenTimeout - fails.push("(TMO): #{link}") - rescue OpenSSL::SSL::SSLError - fails.push("(SSL): #{link}") - rescue SocketError - fails.push("(SOK): #{link}") - rescue Errno::ECONNREFUSED - fails.push("(CON): #{link}") - rescue Errno::ECONNRESET - next - end - progressbar.increment -end -puts "#{count}/#{total} links checked" -if fails.length <= 0 - puts "all links valid" - exit(0) -else - puts "-- RESULTS --" - fails.sort! - fails.each do |e| - puts e - end - exit(1) -end