Browse Source

Filter the links after the index section

pull/1985/head
Matheus Felipe 2 years ago
parent
commit
f921f4ec5d
No known key found for this signature in database GPG Key ID: AA785C523274872F
1 changed files with 11 additions and 5 deletions
  1. +11
    -5
      build/validate_links.py

+ 11
- 5
build/validate_links.py View File

@@ -14,12 +14,19 @@ ignored_links = [

def parse_links(filename):
"""Returns a list of URLs from text file"""
with open(filename) as fp:
data = fp.read()
with open(filename, mode='r', encoding='utf-8') as fp:
readme = fp.read()
index_section = readme.find('## Index')
content = readme[index_section:]

raw_links = re.findall(
'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))',
data)
links = [raw_link[0] for raw_link in raw_links]
content)

links = [
str(raw_link[0]).rstrip('/') for raw_link in raw_links
]

return links

def dup_links(links):
@@ -30,7 +37,6 @@ def dup_links(links):
dupes = []

for link in links:
link = link.rstrip('/')
if link in ignored_links:
continue



Loading…
Cancel
Save