소스 검색

Implementing the cloudflare protection check

Related:
- #2409
- #2960
pull/3011/head
Matheus Felipe 2 년 전
부모
커밋
8136aa9ed1
No known key found for this signature in database GPG 키 ID: AA785C523274872F
1개의 변경된 파일60개의 추가작업 그리고 1개의 파일을 삭제
  1. +60
    -1
      scripts/validate/links.py

+ 60
- 1
scripts/validate/links.py 파일 보기

@@ -6,6 +6,7 @@ import random
from typing import List, Tuple

import requests
from requests.models import Response


def find_links_in_text(text: str) -> List[str]:
@@ -88,6 +89,63 @@ def get_host_from_link(link: str) -> str:
return host


def has_cloudflare_protection(resp: Response) -> bool:
"""Checks if there is any cloudflare protection in the response.
Cloudflare implements multiple network protections on a given link,
this script tries to detect if any of them exist in the response from request.

Common protections have the following HTTP code as a response:
- 403: When host header is missing or incorrect (and more)
- 503: When DDOS protection exists
See more about it at:
- https://support.cloudflare.com/hc/en-us/articles/115003014512-4xx-Client-Error
- https://support.cloudflare.com/hc/en-us/articles/115003011431-Troubleshooting-Cloudflare-5XX-errors
- https://www.cloudflare.com/ddos/
- https://superuser.com/a/888526

Discussions in issues and pull requests:
- https://github.com/public-apis/public-apis/pull/2409
- https://github.com/public-apis/public-apis/issues/2960
"""

code = resp.status_code
server = resp.headers.get('Server') or resp.headers.get('server')
cloudflare_flags = [
'403 Forbidden',
'cloudflare',
'Cloudflare',
'Security check',
'Please Wait... | Cloudflare',
'We are checking your browser...',
'Please stand by, while we are checking your browser...',
'Checking your browser before accessing',
'This process is automatic.',
'Your browser will redirect to your requested content shortly.',
'Please allow up to 5 seconds',
'DDoS protection by',
'Ray ID:',
'Cloudflare Ray ID:',
'_cf_chl',
'_cf_chl_opt',
'__cf_chl_rt_tk',
'cf-spinner-please-wait',
'cf-spinner-redirecting'
]

if code in [403, 503] and server == 'cloudflare':
html = resp.text

flags_found = [flag in html for flag in cloudflare_flags]
any_flag_found = any(flags_found)

if any_flag_found:
return True

return False


def check_if_link_is_working(link: str) -> Tuple[bool, str]:
"""Checks if a link is working.

@@ -109,7 +167,8 @@ def check_if_link_is_working(link: str) -> Tuple[bool, str]:
})

code = resp.status_code
if code >= 400:

if code >= 400 and not has_cloudflare_protection(resp):
has_error = True
error_message = f'ERR:CLT: {code} : {link}'



불러오는 중...
취소
저장