From 29351783e918fd9eb5d36142fef41916ef1d8e5d Mon Sep 17 00:00:00 2001 From: Sitram Date: Thu, 15 Oct 2020 21:50:42 +0300 Subject: [PATCH] Remove broken links and fix python script (#1418) --- README.md | 15 +-------------- build/validate_links.py | 8 ++++---- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 3676a6ad..f9d1c0d0 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ API | Description | Auth | HTTPS | CORS | API | Description | Auth | HTTPS | CORS | |---|---|---|---|---| | [AbuseIPDB](https://docs.abuseipdb.com/) | IP/domain/URL reputation | `apiKey` | Yes | Unknown | -| [AlienVault Open Threat Exchange (OTX)](https://otx.alienvault.com/api/) | IP/domain/URL reputation | `apiKey` | Yes | Unknown | +| [AlienVault Open Threat Exchange (OTX)](https://otx.alienvault.com/api) | IP/domain/URL reputation | `apiKey` | Yes | Unknown | | [Google Safe Browsing](https://developers.google.com/safe-browsing/) | Google Link/Domain Flagging | `apiKey` | Yes | Unknown | | [Metacert](https://metacert.com/) | Metacert Link Flagging | `apiKey` | Yes | Unknown | | [URLScan.io](https://urlscan.io/about-api/) | Scan and Analyse URLs | `apiKey` | Yes | Unknown | @@ -206,7 +206,6 @@ API | Description | Auth | HTTPS | CORS | | [Livecoin](https://www.livecoin.net/api) | Cryptocurrency Exchange | No | Yes | Unknown | | [MercadoBitcoin](https://www.mercadobitcoin.net/api-doc/) | Brazilian Cryptocurrency Information | No | Yes | Unknown | | [Nexchange](https://nexchange2.docs.apiary.io/) | Automated cryptocurrency exchange service | No | No | Yes | -| [NiceHash](https://docs.nicehash.com/) | Largest Crypto Mining Marketplace | `apiKey` | Yes | Unknown | | [Poloniex](https://poloniex.com/support/api/) | US based digital asset exchange | `apiKey` | Yes | Unknown | | [WorldCoinIndex](https://www.worldcoinindex.com/apiservice) | Cryptocurrencies Prices | `apiKey` | Yes | Unknown | @@ -257,7 +256,6 @@ API | Description | Auth | HTTPS | CORS | | [CountAPI](https://countapi.xyz) | Free and simple counting service. You can use it to track page hits and specific events | No | Yes | Yes | | [DigitalOcean Status](https://status.digitalocean.com/api) | Status of all DigitalOcean services | No | Yes | Unknown | | [DomainDb Info](https://api.domainsdb.info/) | Domain name search to find all domains containing particular words/phrases/etc | No | Yes | Unknown | -| [Faceplusplus](https://www.faceplusplus.com/) | A tool to detect face | `OAuth` | Yes | Unknown | | [Genderize.io](https://genderize.io) | Estimates a gender from a first name | No | Yes | Yes | | [GitHub](https://docs.github.com/en/free-pro-team@latest/rest) | Make use of GitHub repositories, code and user info programmatically | `OAuth` | Yes | Yes | | [Gitlab](https://docs.gitlab.com/ee/api/) | Automate GitLab interaction programmatically | `OAuth` | Yes | Unknown | @@ -375,7 +373,6 @@ API | Description | Auth | HTTPS | CORS | |---|---|---|---|---| | [Age of Empires II](https://age-of-empires-2-api.herokuapp.com) | Get information about Age of Empires II resources | No | Yes | Unknown | | [AmiiboAPI](http://www.amiiboapi.com/) | Amiibo Information | No | No | Yes | -| [Battle.net](https://dev.battle.net/) | Blizzard Entertainment | `apiKey` | Yes | Unknown | | [Chuck Norris Database](http://www.icndb.com/api/) | Jokes | No | No | Unknown | | [Clash of Clans](https://developer.clashofclans.com) | Clash of Clans Game Information | `apiKey` | Yes | Unknown | | [Clash Royale](https://developer.clashroyale.com) | Clash Royale Game Information | `apiKey` | Yes | Unknown | @@ -436,7 +433,6 @@ API | Description | Auth | HTTPS | CORS | | [Google Maps](https://developers.google.com/maps/) | Create/customize digital maps based on Google Maps data | `apiKey` | Yes | Unknown | | [HelloSalut](https://www.fourtonfish.com/hellosalut/hello/) | Get hello translation following user language | No | Yes | Unknown | | [HERE Maps](https://developer.here.com) | Create/customize digital maps based on HERE Maps data | `apiKey` | Yes | Unknown | -| [Indian Cities](https://indian-cities-api-nocbegfhqg.now.sh/) | Get all Indian cities in a clean JSON Format | No | Yes | Yes | | [IP 2 Country](https://ip2country.info) | Map an IP to a country | No | Yes | Unknown | | [IP Address Details](https://ipinfo.io/) | Find geolocation with ip address | No | Yes | Unknown | | [IP Location](http://ip-api.com/) | Find location with ip address | No | No | Unknown | @@ -599,7 +595,6 @@ API | Description | Auth | HTTPS | CORS | |---|---|---|---|---| | [18F](http://18f.github.io/API-All-the-X/) | Unofficial US Federal Government API Development | No | No | Unknown | | [Archive.org](https://archive.readme.io/docs) | The Internet Archive | No | Yes | Unknown | -| [ARSAT](https://datos.arsat.com.ar/developers/) | ARSAT public data | `apiKey` | Yes | Unknown | | [Callook.info](https://callook.info) | United States ham radio callsigns | No | Yes | Unknown | | [CARTO](https://carto.com/) | Location Information Prediction | `apiKey` | Yes | Unknown | | [CivicFeed](https://developers.civicfeed.com/) | News articles and public datasets | `apiKey` | Yes | Unknown | @@ -628,7 +623,6 @@ API | Description | Auth | HTTPS | CORS | | [Countly](https://api.count.ly/reference) | Countly web analytics | No | No | Unknown | | [Drupal.org](https://www.drupal.org/drupalorg/docs/api) | Drupal.org | No | Yes | Unknown | | [Evil Insult Generator](https://evilinsult.com/api) | Evil Insults | No | Yes | Yes | -| [Libraries.io](https://libraries.io/api) | Open source software libraries | `apiKey` | Yes | Unknown | **[⬆ Back to Index](#index)** ### Patent @@ -743,7 +737,6 @@ API | Description | Auth | HTTPS | CORS | | [Instagram](https://www.instagram.com/developer/) | Instagram Login, Share on Instagram, Social Plugins and more | `OAuth` | Yes | Unknown | | [LinkedIn](https://developer.linkedin.com/docs/rest-api) | The foundation of all digital integrations with LinkedIn | `OAuth` | Yes | Unknown | | [Meetup.com](https://www.meetup.com/meetup_api/) | Data about Meetups from Meetup.com | `apiKey` | Yes | Unknown | -| [Mixer](https://dev.mixer.com/) | Game Streaming API | `OAuth` | Yes | Unknown | | [MySocialApp](https://mysocialapp.io) | Seamless Social Networking features, API, SDK to any app | `apiKey` | Yes | Unknown | | [Open Collective](https://docs.opencollective.com/help/developers/api) | Get Open Collective data | No | Yes | Unknown | | [Pinterest](https://developers.pinterest.com/) | The world's catalog of ideas | `OAuth` | Yes | Unknown | @@ -772,7 +765,6 @@ API | Description | Auth | HTTPS | CORS | | [Football-Data.org](http://api.football-data.org/index) | Football Data | No | No | Unknown | | [JCDecaux Bike](https://developer.jcdecaux.com/) | JCDecaux's self-service bicycles | `apiKey` | Yes | Unknown | | [NBA Stats](https://any-api.com/nba_com/nba_com/docs/API_Description) | Current and historical NBA Statistics | No | Yes | Unknown | -| [NFL Arrests](http://nflarrest.com/api/) | NFL Arrest Data | No | No | Unknown | | [NHL Records and Stats](https://gitlab.com/dword4/nhlapi) | NHL historical data and statistics | No | Yes | Unknown | | [Strava](https://strava.github.io/api/) | Connect with athletes, activities and more | `OAuth` | Yes | Unknown | | [SuredBits](https://suredbits.com/api/) | Query sports data, including teams, players, games, scores and statistics | No | No | No | @@ -783,7 +775,6 @@ API | Description | Auth | HTTPS | CORS | ### Test Data API | Description | Auth | HTTPS | CORS | |---|---|---|---|---| -| [Adorable Avatars](http://avatars.adorable.io) | Generate random cartoon avatars | No | Yes | Unknown | | [Bacon Ipsum](https://baconipsum.com/json-api/) | A Meatier Lorem Ipsum Generator | No | Yes | Unknown | | [Dicebear Avatars](https://avatars.dicebear.com/) | Generate random pixel-art avatars | No | Yes | No | | [FakeJSON](https://fakejson.com) | Service to generate test and fake data | `apiKey` | Yes | Yes | @@ -823,12 +814,10 @@ API | Description | Auth | HTTPS | CORS | |---|---|---|---|---| | [ADS-B Exchange](https://www.adsbexchange.com/data/) | Access real-time and historical data of any and all airborne aircraft | No | Yes | Unknown | | [AIS Hub](http://www.aishub.net/api) | Real-time data of any marine and inland vessel equipped with AIS tracking system | `apiKey` | No | Unknown | -| [AIS Web](http://www.aisweb.aer.mil.br/api/doc/index.cfm) | Aeronautical information in digital media produced by the Department of Airspace Control (DECEA) | `apiKey` | No | Unknown | | [Amadeus Travel Innovation Sandbox](https://sandbox.amadeus.com/) | Travel Search - Limited usage | `apiKey` | Yes | Unknown | | [Bay Area Rapid Transit](http://api.bart.gov) | Stations and predicted arrivals for BART | `apiKey` | No | Unknown | | [BlaBlaCar](https://dev.blablacar.com) | Search car sharing trips | `apiKey` | Yes | Unknown | | [Community Transit](https://github.com/transitland/transitland-datastore/blob/master/README.md#api-endpoints) | Transitland API | No | Yes | Unknown | -| [Goibibo](https://developer.goibibo.com/docs) | API for travel search | `apiKey` | Yes | Unknown | | [GraphHopper](https://graphhopper.com/api/1/docs/) | A-to-B routing with turn-by-turn instructions | `apiKey` | Yes | Unknown | | [Icelandic APIs](http://docs.apis.is/) | Open APIs that deliver services in or regarding Iceland | No | Yes | Unknown | | [Izi](http://api-docs.izi.travel/) | Audio guide for travellers | `apiKey` | Yes | Unknown | @@ -856,8 +845,6 @@ API | Description | Auth | HTTPS | CORS | | [Transport for Lisbon, Portugal](https://emel.city-platform.com/opendata/) | Data about buses routes, parking and traffic | `apiKey` | Yes | Unknown | | [Transport for London, England](https://api.tfl.gov.uk) | TfL API | No | Yes | Unknown | | [Transport for Manchester, England](https://developer.tfgm.com/) | TfGM transport network data | `apiKey` | Yes | No | -| [Transport for New York City, US](http://datamine.mta.info/) | MTA | `apiKey` | No | Unknown | -| [Transport for Norway](http://reisapi.ruter.no/help) | Norwegian transport API | No | No | Unknown | | [Transport for Paris, France](http://restratpws.azurewebsites.net/swagger/) | Live schedules made simple | No | No | Unknown | | [Transport for Paris, France](http://data.ratp.fr/api/v1/console/datasets/1.0/search/) | RATP Open Data API | No | No | Unknown | | [Transport for Philadelphia, US](http://www3.septa.org/hackathon/) | SEPTA APIs | No | No | Unknown | diff --git a/build/validate_links.py b/build/validate_links.py index 44c97ffb..bc2cdff1 100755 --- a/build/validate_links.py +++ b/build/validate_links.py @@ -11,9 +11,9 @@ def parse_links(filename): with open(filename) as fp: data = fp.read() raw_links = re.findall( - 'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', + '((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))', data) - links = [raw_link.replace(')', '') for raw_link in raw_links] + links = [raw_link[0] for raw_link in raw_links] return links @@ -22,9 +22,9 @@ def validate_links(links): print('Validating {} links...'.format(len(links))) errors = [] for link in links: - h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=5) + h = httplib2.Http(disable_ssl_certificate_validation=True, timeout=10) try: - resp = h.request(link, 'HEAD') + resp = h.request(link) code = int(resp[0]['status']) # check if status code is a client or server error if code >= 404: