Files
py_facebook/spider/proxy_valid.py

182 lines
15 KiB
Python
Raw Normal View History

import os
import requests
import time
from typing import Optional, Tuple, Dict, Any
class ProxyChecker:
def __init__(self, timeout: int = 5):
"""
初始化代理检查器
Args:
api_url: 获取代理的API地址
timeout: 请求超时时间
"""
self.timeout = timeout
def get_proxies_from_api(self) -> list:
"""
从API获取代理列表
Returns:
list: 代理列表格式为 [{"ip": "x.x.x.x", "port": xxxx}, ...]
"""
try:
2026-02-09 20:40:49 +08:00
url = 'http://123.58.209.87:8080/S2uDt8u8/proxy'
2026-02-09 18:01:07 +08:00
response = requests.get(url=url, timeout=self.timeout, verify=False)
response.raise_for_status()
data = response.json()
2026-02-09 18:01:07 +08:00
# {'code': 200, 'msg': 'SUCCESS', 'data': {'list': ['5.78.16.61:24878', '5.78.16.61:24879', '5.78.16.61:24880', '5.78.16.61:24881', '5.78.16.61:24882', '5.78.16.61:24883', '5.78.16.61:24884', '5.78.16.61:24885', '5.78.16.61:24886', '5.78.16.61:24887', '5.78.16.61:24888', '5.78.16.61:24889', '5.78.16.61:24890', '5.78.16.61:24891', '5.78.16.61:24892', '5.78.16.61:24893', '5.78.16.61:24894', '5.78.16.61:24895', '5.78.16.61:24896', '5.78.16.61:24897', '5.78.16.61:24898', '5.78.16.61:24899', '5.78.16.61:24900', '5.78.16.61:24901', '5.78.16.61:24902', '5.78.16.61:24903', '5.78.16.61:24904', '5.78.16.61:24905', '5.78.16.61:24906', '5.78.16.61:24907', '5.78.16.61:24908', '5.78.16.61:24909', '5.78.16.61:24910', '5.78.16.61:24911', '5.78.16.61:24912', '5.78.16.61:24913', '5.78.16.61:24914', '5.78.16.61:24915', '5.78.16.61:24916', '5.78.16.61:24917', '5.78.16.61:24918', '5.78.16.61:24919', '5.78.16.61:24920', '5.78.16.61:24921', '5.78.16.61:24922', '5.78.16.61:24923', '5.78.16.61:24924', '5.78.16.61:24925', '5.78.16.61:24926', '5.78.16.61:24927', '5.78.16.61:24928', '5.78.16.61:24929', '5.78.16.61:24930', '5.78.16.61:24931', '5.78.16.61:24932', '5.78.16.61:24933', '5.78.16.61:24934', '5.78.16.61:24935', '5.78.16.61:24936', '5.78.16.61:24937', '5.78.16.61:24938', '5.78.16.61:24939', '5.78.16.61:24940', '5.78.16.61:24941', '5.78.16.61:24942', '5.78.16.61:24943', '5.78.16.61:24944', '5.78.16.61:24945', '5.78.16.61:24946', '5.78.16.61:24947', '5.78.16.61:24948', '5.78.16.61:24949', '5.78.16.61:24950', '5.78.16.61:24951', '5.78.16.61:24952', '5.78.16.61:24953', '5.78.16.61:24954', '5.78.16.61:24955', '5.78.16.61:24956', '5.78.16.61:24957', '5.78.16.61:24958', '5.78.16.61:24959', '5.78.16.61:24960', '5.78.16.61:24961', '5.78.16.61:24962', '5.78.16.61:24963', '5.78.16.61:24964', '5.78.16.61:24965', '5.78.16.61:24966', '5.78.16.61:24967', '5.78.16.61:24968', '5.78.16.61:24969', '5.78.16.61:24970', '5.78.16.61:24971', '5.78.16.61:24972', '5.78.16.61:24973', '5.78.16.61:24974', '5.78.16.61:24975', '5.78.16.61:24976', '5.78.16.61:24977', '5.78.16.61:24978', '5.78.16.61:24979', '5.78.16.61:24980', '5.78.16.61:24981', '5.78.16.61:24982', '5.78.16.61:24983', '5.78.16.61:24984', '5.78.16.61:24985', '5.78.16.61:24986', '5.78.16.61:24987', '5.78.16.61:24988', '5.78.16.61:24989', '5.78.16.61:24990', '5.78.16.61:24991', '5.78.16.61:24992', '5.78.16.61:24993', '5.78.16.61:24994', '5.78.16.61:24995', '5.78.16.61:24996', '5.78.16.61:24997', '5.78.16.61:24998', '5.78.16.61:24999', '5.78.16.61:25000', '5.78.16.61:25001', '5.78.16.61:25002', '5.78.16.61:25003', '5.78.16.61:25004', '5.78.16.61:25005', '5.78.16.61:25006', '5.78.16.61:25007', '5.78.16.61:25008', '5.78.16.61:25009', '5.78.16.61:25010', '5.78.16.61:25011', '5.78.16.61:25012', '5.78.16.61:25013', '5.78.16.61:25014', '5.78.16.61:25015', '5.78.16.61:25016', '5.78.16.61:25017', '5.78.16.61:25018', '5.78.16.61:25019', '5.78.16.61:25020', '5.78.16.61:25021', '5.78.16.61:25022', '5.78.16.61:25023', '5.78.16.61:25024', '5.78.16.61:25025', '5.78.16.61:25026', '5.78.16.61:25027', '5.78.16.61:25028', '5.78.16.61:25029', '5.78.16.61:25030', '5.78.16.61:25031', '5.78.16.61:25032', '5.78.16.61:25033', '5.78.16.61:25034', '5.78.16.61:25035', '5.78.16.61:25036', '5.78.16.61:25037', '5.78.16.61:25038', '5.78.16.61:25039', '5.78.16.61:25040', '5.78.16.61:25041', '5.78.16.61:25042', '5.78.16.61:25043', '5.78.16.61:25044', '5.78.16.61:25045', '5.78.16.61:25046', '5.78.16.61:25047', '5.78.16.61:25048', '5.78.16.61:25049', '5.78.16.61:25050', '5.78.16.61:25051', '5.78.16.61:25052', '5.78.16.61:25053', '5.78.16.61:25054', '5.78.16.61:25055', '5.78.16.61:25056', '5.78.16.61:25057', '5.78.16.61:25058', '5.78.16.61:25059', '5.78.16.61:25060', '5.78.16.61:25061', '5.78.16.61:25062', '5.78.16.61:25063', '5.78.16.61:25064', '5.78.16.61:25065', '5.78.16.61:25066', '5.78.16.61:25067', '5.78.16.61:25068', '5.78.16.61:25069', '5.78.16.61:25070', '5.78.16.61:25071', '5.78.16.61:25072', '5.78.16.61:25073', '5.78.16.61:25074', '5.78.16.61:25075', '5.78.16.61:25076', '5.78.16.61:25077', '5.78.16.61:25078', '5.78.16.61
if data.get("code") == 200 and data.get("msg"):
return [{"ip": ip_port.split(":")[0], "port": int(ip_port.split(":")[1])} for ip_port in data.get("data").get("list", [])]
else:
print(f"API返回错误: {data.get('msg')}")
return []
except requests.exceptions.RequestException as e:
print(f"获取代理失败: {e}")
return []
except ValueError as e:
print(f"解析JSON失败: {e}")
return []
def check_proxy(self, proxy: Dict[str, Any]) -> bool:
"""
检查单个代理是否有效
Args:
proxy: 代理字典包含ip和port
Returns:
bool: 代理是否有效
"""
2026-02-09 18:01:07 +08:00
if proxy["port"] != '':
proxy_url = f"http://{proxy['ip']}:{proxy['port']}"
else:
proxy_url = f"http://{proxy['ip']}"
proxies = {
"http": proxy_url,
"https": proxy_url
}
test_urls = [
"https://www.facebook.com"
]
for test_url in test_urls:
try:
start_time = time.time()
response = requests.get(
test_url,
proxies=proxies,
timeout=self.timeout,
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
}
)
response_time = time.time() - start_time
if response.status_code == 200:
print(f"代理 {proxy_url} 有效,响应时间: {response_time:.2f}s测试URL: {test_url}")
return True
except (requests.exceptions.RequestException, requests.exceptions.ProxyError):
continue
return False
def get_valid_proxy(self) -> Optional[Tuple[str, int]]:
"""
获取并验证代理返回第一个有效的代理
Returns:
tuple: (ip, port) None如果没有有效代理
"""
print("正在从API获取代理列表...")
# 本地开发模式
if os.getenv("dev"):
return "127.0.0.1", 1080
proxies = self.get_proxies_from_api()
if not proxies:
print("未获取到代理列表")
return None
print(f"获取到 {len(proxies)} 个代理,开始验证...")
for proxy in proxies:
print(f"正在验证代理: {proxy['ip']}:{proxy['port']}")
if self.check_proxy(proxy):
print(f"找到有效代理: {proxy['ip']}:{proxy['port']}")
return (proxy['ip'], proxy['port'])
print("所有代理均无效")
return None
def get_valid_proxy_dict(self) -> Optional[Dict[str, str]]:
"""
获取有效的代理字典格式
Returns:
dict: {"http": "http://ip:port", "https": "http://ip:port"} None
"""
result = self.get_valid_proxy()
if result:
ip, port = result
proxy_url = f"http://{ip}:{port}"
return {
"http": proxy_url,
"https": proxy_url
}
return None
def get_valid_proxy_url(self) -> Optional[str]:
"""
获取有效的代理URL格式 (http://ip:port)
Returns:
str: "http://ip:port" None
"""
result = self.get_valid_proxy()
if result:
ip, port = result
return f"http://{ip}:{port}"
return None
# 使用示例
if __name__ == "__main__":
# 你的API地址
API_URL = "你的API地址" # 替换为实际的API地址
checker = ProxyChecker(timeout=8)
# 获取有效代理(返回元组格式)
valid_proxy = checker.get_valid_proxy()
if valid_proxy:
ip, port = valid_proxy
print(f"\n最终选择的代理: {ip}:{port}")
# 使用代理示例
proxies = {
"http": f"http://{ip}:{port}",
"https": f"http://{ip}:{port}"
}
try:
response = requests.get("http://httpbin.org/ip", proxies=proxies, timeout=10)
print(f"使用代理请求测试: {response.text}")
except Exception as e:
print(f"测试请求失败: {e}")
else:
print("未找到有效代理")
# 或者直接获取代理字典格式
# proxy_dict = checker.get_valid_proxy_dict()
# if proxy_dict:
# print(f"代理字典: {proxy_dict}")