You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

test_validate_links.py 4.3 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. # -*- coding: utf-8 -*-
  2. import unittest
  3. from validate.links import find_links_in_text
  4. from validate.links import get_host_from_link
  5. from validate.links import has_cloudflare_protection
  6. class FakeResponse():
  7. def __init__(self, code: int, headers: dict, text: str) -> None:
  8. self.status_code = code
  9. self.headers = headers
  10. self.text = text
  11. class TestValidateLinks(unittest.TestCase):
  12. def setUp(self):
  13. self.code_200 = 200
  14. self.code_403 = 403
  15. self.code_503 = 503
  16. self.cloudflare_headers = {'Server': 'cloudflare'}
  17. self.no_cloudflare_headers = {'Server': 'google'}
  18. self.text_with_cloudflare_flags = '403 Forbidden Cloudflare We are checking your browser...'
  19. self.text_without_cloudflare_flags = 'Lorem Ipsum'
  20. def test_find_link_in_text(self):
  21. text = """
  22. # this is valid
  23. http://example.com?param1=1&param2=2#anchor
  24. https://www.example.com?param1=1&param2=2#anchor
  25. https://www.example.com.br
  26. https://www.example.com.gov.br
  27. [Example](https://www.example.com?param1=1&param2=2#anchor)
  28. lorem ipsum https://www.example.com?param1=1&param2=2#anchor
  29. https://www.example.com?param1=1&param2=2#anchor lorem ipsum
  30. # this not is valid
  31. example.com
  32. https:example.com
  33. https:/example.com
  34. https//example.com
  35. https//.com
  36. """
  37. links = find_links_in_text(text)
  38. self.assertIsInstance(links, list)
  39. self.assertEqual(len(links), 7)
  40. for link in links:
  41. with self.subTest():
  42. self.assertIsInstance(link, str)
  43. def test_find_link_in_text_with_invalid_argument(self):
  44. with self.assertRaises(TypeError):
  45. find_links_in_text()
  46. find_links_in_text(1)
  47. find_links_in_text(True)
  48. def test_get_host_from_link(self):
  49. links = [
  50. 'example.com',
  51. 'https://example.com',
  52. 'https://www.example.com',
  53. 'https://www.example.com.br',
  54. 'https://www.example.com/route',
  55. 'https://www.example.com?p=1&q=2',
  56. 'https://www.example.com#anchor'
  57. ]
  58. for link in links:
  59. host = get_host_from_link(link)
  60. with self.subTest():
  61. self.assertIsInstance(host, str)
  62. self.assertNotIn('://', host)
  63. self.assertNotIn('/', host)
  64. self.assertNotIn('?', host)
  65. self.assertNotIn('#', host)
  66. with self.assertRaises(TypeError):
  67. get_host_from_link()
  68. def test_has_cloudflare_protection_with_code_403_and_503_in_response(self):
  69. resp_with_cloudflare_protection_code_403 = FakeResponse(
  70. code=self.code_403,
  71. headers=self.cloudflare_headers,
  72. text=self.text_with_cloudflare_flags
  73. )
  74. resp_with_cloudflare_protection_code_503 = FakeResponse(
  75. code=self.code_503,
  76. headers=self.cloudflare_headers,
  77. text=self.text_with_cloudflare_flags
  78. )
  79. result1 = has_cloudflare_protection(resp_with_cloudflare_protection_code_403)
  80. result2 = has_cloudflare_protection(resp_with_cloudflare_protection_code_503)
  81. self.assertTrue(result1)
  82. self.assertTrue(result2)
  83. def test_has_cloudflare_protection_when_there_is_no_protection(self):
  84. resp_without_cloudflare_protection1 = FakeResponse(
  85. code=self.code_200,
  86. headers=self.no_cloudflare_headers,
  87. text=self.text_without_cloudflare_flags
  88. )
  89. resp_without_cloudflare_protection2 = FakeResponse(
  90. code=self.code_403,
  91. headers=self.no_cloudflare_headers,
  92. text=self.text_without_cloudflare_flags
  93. )
  94. resp_without_cloudflare_protection3 = FakeResponse(
  95. code=self.code_503,
  96. headers=self.no_cloudflare_headers,
  97. text=self.text_without_cloudflare_flags
  98. )
  99. result1 = has_cloudflare_protection(resp_without_cloudflare_protection1)
  100. result2 = has_cloudflare_protection(resp_without_cloudflare_protection2)
  101. result3 = has_cloudflare_protection(resp_without_cloudflare_protection3)
  102. self.assertFalse(result1)
  103. self.assertFalse(result2)
  104. self.assertFalse(result3)