Browse Source

Implement functions to find links in a text/file

pull/3011/head
Matheus Felipe 2 years ago
parent
commit
4808d633a1
No known key found for this signature in database GPG Key ID: AA785C523274872F
1 changed files with 42 additions and 0 deletions
  1. +42
    -0
      scripts/validate/links.py

+ 42
- 0
scripts/validate/links.py View File

@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-

import sys
import re
from typing import List


def find_links_in_text(text: str) -> List[str]:
"""Find links in a text and return a list of URLs."""

link_pattern = re.compile(r'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))')

raw_links = re.findall(link_pattern, text)

links = [
str(raw_link[0]).rstrip('/') for raw_link in raw_links
]

return links


def find_links_in_file(filename: str) -> List[str]:
"""Find links in a file and return a list of URLs from text file."""

with open(filename, mode='r', encoding='utf-8') as file:
readme = file.read()
index_section = readme.find('## Index')
content = readme[index_section:]

links = find_links_in_text(content)

return links


if __name__ == '__main__':
num_args = len(sys.argv)

if num_args < 2:
print('No .md file passed')
sys.exit(1)

links = find_links_in_file(sys.argv[1])

Loading…
Cancel
Save