|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- # -*- coding: utf-8 -*-
-
- import unittest
-
- from validate.links import find_links_in_text
- from validate.links import get_host_from_link
- from validate.links import has_cloudflare_protection
-
-
- class FakeResponse():
- def __init__(self, code: int, headers: dict, text: str) -> None:
- self.status_code = code
- self.headers = headers
- self.text = text
-
-
- class TestValidateLinks(unittest.TestCase):
-
- def setUp(self):
- self.code_200 = 200
- self.code_403 = 403
- self.code_503 = 503
-
- self.cloudflare_headers = {'Server': 'cloudflare'}
- self.no_cloudflare_headers = {'Server': 'google'}
-
- self.text_with_cloudflare_flags = '403 Forbidden Cloudflare We are checking your browser...'
- self.text_without_cloudflare_flags = 'Lorem Ipsum'
-
- def test_find_link_in_text(self):
- text = """
- # this is valid
-
- http://example.com?param1=1¶m2=2#anchor
- https://www.example.com?param1=1¶m2=2#anchor
- https://www.example.com.br
- https://www.example.com.gov.br
- [Example](https://www.example.com?param1=1¶m2=2#anchor)
- lorem ipsum https://www.example.com?param1=1¶m2=2#anchor
- https://www.example.com?param1=1¶m2=2#anchor lorem ipsum
-
- # this not is valid
-
- example.com
- https:example.com
- https:/example.com
- https//example.com
- https//.com
- """
-
- links = find_links_in_text(text)
-
- self.assertIsInstance(links, list)
- self.assertEqual(len(links), 7)
-
- for link in links:
- with self.subTest():
- self.assertIsInstance(link, str)
-
- def test_find_link_in_text_with_invalid_argument(self):
- with self.assertRaises(TypeError):
- find_links_in_text()
- find_links_in_text(1)
- find_links_in_text(True)
-
- def test_get_host_from_link(self):
- links = [
- 'example.com',
- 'https://example.com',
- 'https://www.example.com',
- 'https://www.example.com.br',
- 'https://www.example.com/route',
- 'https://www.example.com?p=1&q=2',
- 'https://www.example.com#anchor'
- ]
-
- for link in links:
- host = get_host_from_link(link)
-
- with self.subTest():
- self.assertIsInstance(host, str)
-
- self.assertNotIn('://', host)
- self.assertNotIn('/', host)
- self.assertNotIn('?', host)
- self.assertNotIn('#', host)
-
- with self.assertRaises(TypeError):
- get_host_from_link()
-
- def test_has_cloudflare_protection_with_code_403_and_503_in_response(self):
- resp_with_cloudflare_protection_code_403 = FakeResponse(
- code=self.code_403,
- headers=self.cloudflare_headers,
- text=self.text_with_cloudflare_flags
- )
-
- resp_with_cloudflare_protection_code_503 = FakeResponse(
- code=self.code_503,
- headers=self.cloudflare_headers,
- text=self.text_with_cloudflare_flags
- )
-
- result1 = has_cloudflare_protection(resp_with_cloudflare_protection_code_403)
- result2 = has_cloudflare_protection(resp_with_cloudflare_protection_code_503)
-
- self.assertTrue(result1)
- self.assertTrue(result2)
-
- def test_has_cloudflare_protection_when_there_is_no_protection(self):
- resp_without_cloudflare_protection1 = FakeResponse(
- code=self.code_200,
- headers=self.no_cloudflare_headers,
- text=self.text_without_cloudflare_flags
- )
-
- resp_without_cloudflare_protection2 = FakeResponse(
- code=self.code_403,
- headers=self.no_cloudflare_headers,
- text=self.text_without_cloudflare_flags
- )
-
- resp_without_cloudflare_protection3 = FakeResponse(
- code=self.code_503,
- headers=self.no_cloudflare_headers,
- text=self.text_without_cloudflare_flags
- )
-
- result1 = has_cloudflare_protection(resp_without_cloudflare_protection1)
- result2 = has_cloudflare_protection(resp_without_cloudflare_protection2)
- result3 = has_cloudflare_protection(resp_without_cloudflare_protection3)
-
- self.assertFalse(result1)
- self.assertFalse(result2)
- self.assertFalse(result3)
|