You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

174 lines
5.6 KiB

  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. from validate.links import find_links_in_text
  4. from validate.links import check_duplicate_links
  5. from validate.links import fake_user_agent
  6. from validate.links import get_host_from_link
  7. from validate.links import has_cloudflare_protection
  8. class FakeResponse():
  9. def __init__(self, code: int, headers: dict, text: str) -> None:
  10. self.status_code = code
  11. self.headers = headers
  12. self.text = text
  13. class TestValidateLinks(unittest.TestCase):
  14. def setUp(self):
  15. self.duplicate_links = [
  16. 'https://www.example.com',
  17. 'https://www.example.com',
  18. 'https://www.example.com',
  19. 'https://www.anotherexample.com',
  20. # added a comment
  21. ]
  22. self.no_duplicate_links = [
  23. 'https://www.firstexample.com',
  24. 'https://www.secondexample.com',
  25. 'https://www.anotherexample.com',
  26. ]
  27. self.code_200 = 200
  28. self.code_403 = 403
  29. self.code_503 = 503
  30. self.cloudflare_headers = {'Server': 'cloudflare'}
  31. self.no_cloudflare_headers = {'Server': 'google'}
  32. self.text_with_cloudflare_flags = '403 Forbidden Cloudflare We are checking your browser...'
  33. self.text_without_cloudflare_flags = 'Lorem Ipsum'
  34. def test_find_link_in_text(self):
  35. text = """
  36. # this is valid
  37. http://example.com?param1=1&param2=2#anchor
  38. https://www.example.com?param1=1&param2=2#anchor
  39. https://www.example.com.br
  40. https://www.example.com.gov.br
  41. [Example](https://www.example.com?param1=1&param2=2#anchor)
  42. lorem ipsum https://www.example.com?param1=1&param2=2#anchor
  43. https://www.example.com?param1=1&param2=2#anchor lorem ipsum
  44. # this not is valid
  45. example.com
  46. https:example.com
  47. https:/example.com
  48. https//example.com
  49. https//.com
  50. """
  51. links = find_links_in_text(text)
  52. self.assertIsInstance(links, list)
  53. self.assertEqual(len(links), 7)
  54. for link in links:
  55. with self.subTest():
  56. self.assertIsInstance(link, str)
  57. def test_find_link_in_text_with_invalid_argument(self):
  58. with self.assertRaises(TypeError):
  59. find_links_in_text()
  60. find_links_in_text(1)
  61. find_links_in_text(True)
  62. def test_if_check_duplicate_links_has_the_correct_return(self):
  63. result_1 = check_duplicate_links(self.duplicate_links)
  64. result_2 = check_duplicate_links(self.no_duplicate_links)
  65. self.assertIsInstance(result_1, tuple)
  66. self.assertIsInstance(result_2, tuple)
  67. has_duplicate_links, links = result_1
  68. no_duplicate_links, no_links = result_2
  69. self.assertTrue(has_duplicate_links)
  70. self.assertFalse(no_duplicate_links)
  71. self.assertIsInstance(links, list)
  72. self.assertIsInstance(no_links, list)
  73. self.assertEqual(len(links), 2)
  74. self.assertEqual(len(no_links), 0)
  75. def test_if_fake_user_agent_has_a_str_as_return(self):
  76. user_agent = fake_user_agent()
  77. self.assertIsInstance(user_agent, str)
  78. def test_get_host_from_link(self):
  79. links = [
  80. 'example.com',
  81. 'https://example.com',
  82. 'https://www.example.com',
  83. 'https://www.example.com.br',
  84. 'https://www.example.com/route',
  85. 'https://www.example.com?p=1&q=2',
  86. 'https://www.example.com#anchor'
  87. ]
  88. for link in links:
  89. host = get_host_from_link(link)
  90. with self.subTest():
  91. self.assertIsInstance(host, str)
  92. self.assertNotIn('://', host)
  93. self.assertNotIn('/', host)
  94. self.assertNotIn('?', host)
  95. self.assertNotIn('#', host)
  96. with self.assertRaises(TypeError):
  97. get_host_from_link()
  98. def test_has_cloudflare_protection_with_code_403_and_503_in_response(self):
  99. resp_with_cloudflare_protection_code_403 = FakeResponse(
  100. code=self.code_403,
  101. headers=self.cloudflare_headers,
  102. text=self.text_with_cloudflare_flags
  103. )
  104. resp_with_cloudflare_protection_code_503 = FakeResponse(
  105. code=self.code_503,
  106. headers=self.cloudflare_headers,
  107. text=self.text_with_cloudflare_flags
  108. )
  109. result1 = has_cloudflare_protection(resp_with_cloudflare_protection_code_403)
  110. result2 = has_cloudflare_protection(resp_with_cloudflare_protection_code_503)
  111. self.assertTrue(result1)
  112. self.assertTrue(result2)
  113. def test_has_cloudflare_protection_when_there_is_no_protection(self):
  114. resp_without_cloudflare_protection1 = FakeResponse(
  115. code=self.code_200,
  116. headers=self.no_cloudflare_headers,
  117. text=self.text_without_cloudflare_flags
  118. )
  119. resp_without_cloudflare_protection2 = FakeResponse(
  120. code=self.code_403,
  121. headers=self.no_cloudflare_headers,
  122. text=self.text_without_cloudflare_flags
  123. )
  124. resp_without_cloudflare_protection3 = FakeResponse(
  125. code=self.code_503,
  126. headers=self.no_cloudflare_headers,
  127. text=self.text_without_cloudflare_flags
  128. )
  129. result1 = has_cloudflare_protection(resp_without_cloudflare_protection1)
  130. result2 = has_cloudflare_protection(resp_without_cloudflare_protection2)
  131. result3 = has_cloudflare_protection(resp_without_cloudflare_protection3)
  132. self.assertFalse(result1)
  133. self.assertFalse(result2)
  134. self.assertFalse(result3)