You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

325 lines
9.9 KiB

  1. # -*- coding: utf-8 -*-
  2. import re
  3. import sys
  4. from string import punctuation
  5. from typing import List, Tuple, Dict
  6. from urllib.parse import urlparse
  7. # Temporary replacement
  8. # The descriptions that contain () at the end must adapt to the new policy later
  9. punctuation = punctuation.replace('()', '')
  10. anchor = '###'
  11. auth_keys = ['apiKey', 'OAuth', 'X-Mashape-Key', 'User-Agent', 'No']
  12. https_keys = ['Yes', 'No']
  13. cors_keys = ['Yes', 'No', 'Unknown']
  14. index_title = 0
  15. index_desc = 1
  16. index_auth = 2
  17. index_https = 3
  18. index_cors = 4
  19. index_call = 5
  20. num_segments = 6
  21. min_segments = 5
  22. max_segments = 6
  23. min_entries_per_category = 3
  24. max_description_length = 100
  25. anchor_re = re.compile(anchor + r'\s(.+)')
  26. category_title_in_index_re = re.compile(r'\*\s\[(.*)\]')
  27. link_re = re.compile(r'\[(.+)\]\((http.*)\)')
  28. # Type aliases
  29. APIList = List[str]
  30. Categories = Dict[str, APIList]
  31. CategoriesLineNumber = Dict[str, int]
  32. def error_message(line_number: int, message: str) -> str:
  33. line = line_number + 1
  34. return f'(L{line:03d}) {message}'
  35. def get_categories_content(contents: List[str]) -> Tuple[Categories, CategoriesLineNumber]:
  36. categories = {}
  37. category_line_num = {}
  38. for line_num, line_content in enumerate(contents):
  39. if line_content.startswith(anchor):
  40. category = line_content.split(anchor)[1].strip()
  41. categories[category] = []
  42. category_line_num[category] = line_num
  43. continue
  44. if not line_content.startswith('|') or line_content.startswith('|---'):
  45. continue
  46. raw_title = [
  47. raw_content.strip() for raw_content in line_content.split('|')[1:-1]
  48. ][0]
  49. title_match = link_re.match(raw_title)
  50. if title_match:
  51. title = title_match.group(1).upper()
  52. categories[category].append(title)
  53. return (categories, category_line_num)
  54. def check_alphabetical_order(lines: List[str]) -> List[str]:
  55. err_msgs = []
  56. categories, category_line_num = get_categories_content(contents=lines)
  57. for category, api_list in categories.items():
  58. if sorted(api_list) != api_list:
  59. err_msg = error_message(
  60. category_line_num[category],
  61. f'{category} category is not alphabetical order'
  62. )
  63. err_msgs.append(err_msg)
  64. return err_msgs
  65. def check_title(line_num: int, raw_title: str) -> List[str]:
  66. err_msgs = []
  67. title_match = link_re.match(raw_title)
  68. # url should be wrapped in "[TITLE](LINK)" Markdown syntax
  69. if not title_match:
  70. err_msg = error_message(line_num, 'Title syntax should be "[TITLE](LINK)"')
  71. err_msgs.append(err_msg)
  72. else:
  73. # do not allow "... API" in the entry title
  74. title = title_match.group(1)
  75. if title.upper().endswith(' API'):
  76. err_msg = error_message(line_num, 'Title should not end with "... API". Every entry is an API here!')
  77. err_msgs.append(err_msg)
  78. return err_msgs
  79. def check_description(line_num: int, description: str) -> List[str]:
  80. err_msgs = []
  81. first_char = description[0]
  82. if first_char.upper() != first_char:
  83. err_msg = error_message(line_num, 'first character of description is not capitalized')
  84. err_msgs.append(err_msg)
  85. last_char = description[-1]
  86. if last_char in punctuation:
  87. err_msg = error_message(line_num, f'description should not end with {last_char}')
  88. err_msgs.append(err_msg)
  89. desc_length = len(description)
  90. if desc_length > max_description_length:
  91. err_msg = error_message(line_num, f'description should not exceed {max_description_length} characters (currently {desc_length})')
  92. err_msgs.append(err_msg)
  93. return err_msgs
  94. def check_auth(line_num: int, auth: str) -> List[str]:
  95. err_msgs = []
  96. backtick = '`'
  97. if auth != 'No' and (not auth.startswith(backtick) or not auth.endswith(backtick)):
  98. err_msg = error_message(line_num, 'auth value is not enclosed with `backticks`')
  99. err_msgs.append(err_msg)
  100. if auth.replace(backtick, '') not in auth_keys:
  101. err_msg = error_message(line_num, f'{auth} is not a valid Auth option')
  102. err_msgs.append(err_msg)
  103. return err_msgs
  104. def check_https(line_num: int, https: str) -> List[str]:
  105. err_msgs = []
  106. if https not in https_keys:
  107. err_msg = error_message(line_num, f'{https} is not a valid HTTPS option')
  108. err_msgs.append(err_msg)
  109. return err_msgs
  110. def check_cors(line_num: int, cors: str) -> List[str]:
  111. err_msgs = []
  112. if cors not in cors_keys:
  113. err_msg = error_message(line_num, f'{cors} is not a valid CORS option')
  114. err_msgs.append(err_msg)
  115. return err_msgs
  116. def extract_url(markdown_link: str) -> str:
  117. match = re.search(r'\((http[^)]+)\)', markdown_link)
  118. return match.group(1) if match else ''
  119. def uri_validator(url):
  120. try:
  121. result = urlparse(url)
  122. return all([result.scheme, result.netloc]) or ' '
  123. except ValueError:
  124. return False
  125. def check_calls(line_num: int, calls: str) -> List[str]:
  126. err_msgs = []
  127. if not uri_validator(calls):
  128. err_msg = error_message(line_num, 'Call This API column must contain a valid URL')
  129. err_msgs.append(err_msg)
  130. else:
  131. actual_url = extract_url(calls)
  132. parsed_url = urlparse(actual_url)
  133. if not parsed_url.netloc.endswith('pstmn.io') and not parsed_url.netloc.endswith('postman.com'):
  134. err_msg = error_message(line_num, 'Call This API column URL must be a run in Postman button')
  135. err_msgs.append(err_msg)
  136. return err_msgs
  137. def check_entry(line_num: int, segments: List[str]) -> List[str]:
  138. raw_title = segments[index_title]
  139. description = segments[index_desc]
  140. auth = segments[index_auth]
  141. https = segments[index_https]
  142. cors = segments[index_cors]
  143. title_err_msgs = check_title(line_num, raw_title)
  144. desc_err_msgs = check_description(line_num, description)
  145. auth_err_msgs = check_auth(line_num, auth)
  146. https_err_msgs = check_https(line_num, https)
  147. cors_err_msgs = check_cors(line_num, cors)
  148. err_msgs = [
  149. *title_err_msgs,
  150. *desc_err_msgs,
  151. *auth_err_msgs,
  152. *https_err_msgs,
  153. *cors_err_msgs,
  154. ]
  155. if len(segments) == max_segments:
  156. calls_column = segments[index_call].strip()
  157. if calls_column:
  158. optional_column_err_msgs = check_calls(line_num, calls_column)
  159. err_msgs.extend(optional_column_err_msgs)
  160. return err_msgs
  161. def check_file_format(lines: List[str]) -> List[str]:
  162. err_msgs = []
  163. category_title_in_index = []
  164. alphabetical_err_msgs = check_alphabetical_order(lines)
  165. err_msgs.extend(alphabetical_err_msgs)
  166. num_in_category = min_entries_per_category + 1
  167. category = ''
  168. category_line = 0
  169. # Flag to indicate whether we are in the main content section
  170. in_main_content = False
  171. for line_num, line_content in enumerate(lines):
  172. # Check if the line marks the start of the main content section
  173. if "## Index" in line_content:
  174. in_main_content = True
  175. continue
  176. # Skip lines until we reach the main content section
  177. if not in_main_content:
  178. continue
  179. category_title_match = category_title_in_index_re.match(line_content)
  180. if category_title_match:
  181. category_title_in_index.append(category_title_match.group(1))
  182. # check each category for the minimum number of entries
  183. if line_content.startswith(anchor):
  184. category_match = anchor_re.match(line_content)
  185. if category_match:
  186. if category_match.group(1) not in category_title_in_index:
  187. err_msg = error_message(line_num, f'category header ({category_match.group(1)}) not added to Index section')
  188. err_msgs.append(err_msg)
  189. else:
  190. err_msg = error_message(line_num, 'category header is not formatted correctly')
  191. err_msgs.append(err_msg)
  192. if num_in_category < min_entries_per_category:
  193. err_msg = error_message(category_line, f'{category} category does not have the minimum {min_entries_per_category} entries (only has {num_in_category})')
  194. err_msgs.append(err_msg)
  195. category = line_content.split(' ')[1]
  196. category_line = line_num
  197. num_in_category = 0
  198. continue
  199. # skips lines that we do not care about
  200. if not line_content.startswith('|') or line_content.startswith('|:---'):
  201. continue
  202. num_in_category += 1
  203. segments = line_content.split('|')[1:-1]
  204. if len(segments) < 5 or len(segments) > 6:
  205. err_msg = error_message(line_num, f'entry does not have all the required columns (have {len(segments)}, need {min_segments} to {max_segments})')
  206. err_msgs.append(err_msg)
  207. continue
  208. for segment in segments:
  209. # every line segment should start and end with exactly 1 space
  210. if len(segment) - len(segment.lstrip()) != 1 or len(segment) - len(segment.rstrip()) != 1:
  211. err_msg = error_message(line_num, 'each segment must start and end with exactly 1 space')
  212. err_msgs.append(err_msg)
  213. segments = [segment.strip() for segment in segments]
  214. entry_err_msgs = check_entry(line_num, segments)
  215. err_msgs.extend(entry_err_msgs)
  216. return err_msgs
  217. def main(filename: str) -> None:
  218. with open(filename, mode='r', encoding='utf-8') as file:
  219. lines = list(line.rstrip() for line in file)
  220. file_format_err_msgs = check_file_format(lines)
  221. if file_format_err_msgs:
  222. for err_msg in file_format_err_msgs:
  223. print(err_msg)
  224. sys.exit(1)
  225. if __name__ == '__main__':
  226. num_args = len(sys.argv)
  227. if num_args < 2:
  228. print('No .md file passed (file should contain Markdown table syntax)', flush=True)
  229. sys.exit(1)
  230. filename = sys.argv[1]
  231. main(filename)