|
@@ -0,0 +1,42 @@ |
|
|
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
|
|
|
|
|
import sys |
|
|
|
|
|
import re |
|
|
|
|
|
from typing import List |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_links_in_text(text: str) -> List[str]: |
|
|
|
|
|
"""Find links in a text and return a list of URLs.""" |
|
|
|
|
|
|
|
|
|
|
|
link_pattern = re.compile(r'((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'\".,<>?«»“”‘’]))') |
|
|
|
|
|
|
|
|
|
|
|
raw_links = re.findall(link_pattern, text) |
|
|
|
|
|
|
|
|
|
|
|
links = [ |
|
|
|
|
|
str(raw_link[0]).rstrip('/') for raw_link in raw_links |
|
|
|
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
return links |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def find_links_in_file(filename: str) -> List[str]: |
|
|
|
|
|
"""Find links in a file and return a list of URLs from text file.""" |
|
|
|
|
|
|
|
|
|
|
|
with open(filename, mode='r', encoding='utf-8') as file: |
|
|
|
|
|
readme = file.read() |
|
|
|
|
|
index_section = readme.find('## Index') |
|
|
|
|
|
content = readme[index_section:] |
|
|
|
|
|
|
|
|
|
|
|
links = find_links_in_text(content) |
|
|
|
|
|
|
|
|
|
|
|
return links |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
|
|
num_args = len(sys.argv) |
|
|
|
|
|
|
|
|
|
|
|
if num_args < 2: |
|
|
|
|
|
print('No .md file passed') |
|
|
|
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
links = find_links_in_file(sys.argv[1]) |