更改登录为本地校验获取代理
This commit is contained in:
177
spider/proxy_valid.py
Normal file
177
spider/proxy_valid.py
Normal file
@@ -0,0 +1,177 @@
|
||||
import os
|
||||
|
||||
import requests
|
||||
import time
|
||||
from typing import Optional, Tuple, Dict, Any
|
||||
|
||||
|
||||
class ProxyChecker:
|
||||
def __init__(self, timeout: int = 5):
|
||||
"""
|
||||
初始化代理检查器
|
||||
|
||||
Args:
|
||||
api_url: 获取代理的API地址
|
||||
timeout: 请求超时时间(秒)
|
||||
"""
|
||||
self.api_url = "http://api.proxy.ip2world.com/getProxyIp?num=100®ions=us&lb=1&return_type=json&protocol=http"
|
||||
self.timeout = timeout
|
||||
|
||||
def get_proxies_from_api(self) -> list:
|
||||
"""
|
||||
从API获取代理列表
|
||||
|
||||
Returns:
|
||||
list: 代理列表,格式为 [{"ip": "x.x.x.x", "port": xxxx}, ...]
|
||||
"""
|
||||
try:
|
||||
response = requests.get(self.api_url, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
data = response.json()
|
||||
if data.get("code") == 0 and data.get("success"):
|
||||
return data.get("data", [])
|
||||
else:
|
||||
print(f"API返回错误: {data.get('msg')}")
|
||||
return []
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
print(f"获取代理失败: {e}")
|
||||
return []
|
||||
except ValueError as e:
|
||||
print(f"解析JSON失败: {e}")
|
||||
return []
|
||||
|
||||
def check_proxy(self, proxy: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
检查单个代理是否有效
|
||||
|
||||
Args:
|
||||
proxy: 代理字典,包含ip和port
|
||||
|
||||
Returns:
|
||||
bool: 代理是否有效
|
||||
"""
|
||||
proxy_url = f"http://{proxy['ip']}:{proxy['port']}"
|
||||
proxies = {
|
||||
"http": proxy_url,
|
||||
"https": proxy_url
|
||||
}
|
||||
|
||||
test_urls = [
|
||||
"https://www.facebook.com"
|
||||
]
|
||||
|
||||
for test_url in test_urls:
|
||||
try:
|
||||
start_time = time.time()
|
||||
response = requests.get(
|
||||
test_url,
|
||||
proxies=proxies,
|
||||
timeout=self.timeout,
|
||||
headers={
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
|
||||
}
|
||||
)
|
||||
response_time = time.time() - start_time
|
||||
|
||||
if response.status_code == 200:
|
||||
print(f"代理 {proxy_url} 有效,响应时间: {response_time:.2f}s,测试URL: {test_url}")
|
||||
return True
|
||||
|
||||
except (requests.exceptions.RequestException, requests.exceptions.ProxyError):
|
||||
continue
|
||||
|
||||
return False
|
||||
|
||||
def get_valid_proxy(self) -> Optional[Tuple[str, int]]:
|
||||
"""
|
||||
获取并验证代理,返回第一个有效的代理
|
||||
|
||||
Returns:
|
||||
tuple: (ip, port) 或 None(如果没有有效代理)
|
||||
"""
|
||||
print("正在从API获取代理列表...")
|
||||
# 本地开发模式
|
||||
if os.getenv("dev"):
|
||||
return "127.0.0.1", 1080
|
||||
|
||||
proxies = self.get_proxies_from_api()
|
||||
|
||||
if not proxies:
|
||||
print("未获取到代理列表")
|
||||
return None
|
||||
|
||||
print(f"获取到 {len(proxies)} 个代理,开始验证...")
|
||||
|
||||
for proxy in proxies:
|
||||
print(f"正在验证代理: {proxy['ip']}:{proxy['port']}")
|
||||
if self.check_proxy(proxy):
|
||||
print(f"找到有效代理: {proxy['ip']}:{proxy['port']}")
|
||||
return (proxy['ip'], proxy['port'])
|
||||
|
||||
print("所有代理均无效")
|
||||
return None
|
||||
|
||||
def get_valid_proxy_dict(self) -> Optional[Dict[str, str]]:
|
||||
"""
|
||||
获取有效的代理字典格式
|
||||
|
||||
Returns:
|
||||
dict: {"http": "http://ip:port", "https": "http://ip:port"} 或 None
|
||||
"""
|
||||
result = self.get_valid_proxy()
|
||||
if result:
|
||||
ip, port = result
|
||||
proxy_url = f"http://{ip}:{port}"
|
||||
return {
|
||||
"http": proxy_url,
|
||||
"https": proxy_url
|
||||
}
|
||||
return None
|
||||
|
||||
def get_valid_proxy_url(self) -> Optional[str]:
|
||||
"""
|
||||
获取有效的代理URL格式 (http://ip:port)
|
||||
|
||||
Returns:
|
||||
str: "http://ip:port" 或 None
|
||||
"""
|
||||
result = self.get_valid_proxy()
|
||||
if result:
|
||||
ip, port = result
|
||||
return f"http://{ip}:{port}"
|
||||
return None
|
||||
|
||||
|
||||
# 使用示例
|
||||
if __name__ == "__main__":
|
||||
# 你的API地址
|
||||
API_URL = "你的API地址" # 替换为实际的API地址
|
||||
|
||||
checker = ProxyChecker(timeout=8)
|
||||
|
||||
# 获取有效代理(返回元组格式)
|
||||
valid_proxy = checker.get_valid_proxy()
|
||||
if valid_proxy:
|
||||
ip, port = valid_proxy
|
||||
print(f"\n最终选择的代理: {ip}:{port}")
|
||||
|
||||
# 使用代理示例
|
||||
proxies = {
|
||||
"http": f"http://{ip}:{port}",
|
||||
"https": f"http://{ip}:{port}"
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get("http://httpbin.org/ip", proxies=proxies, timeout=10)
|
||||
print(f"使用代理请求测试: {response.text}")
|
||||
except Exception as e:
|
||||
print(f"测试请求失败: {e}")
|
||||
else:
|
||||
print("未找到有效代理")
|
||||
|
||||
# 或者直接获取代理字典格式
|
||||
# proxy_dict = checker.get_valid_proxy_dict()
|
||||
# if proxy_dict:
|
||||
# print(f"代理字典: {proxy_dict}")
|
||||
@@ -24,6 +24,7 @@ import const
|
||||
from const import BUCKET, BASE_PATH
|
||||
from exceptions import AuthException, OperationFailed
|
||||
from miniofile import client, put_object
|
||||
from spider.proxy_valid import ProxyChecker
|
||||
|
||||
|
||||
def sleep(a, b=None):
|
||||
@@ -852,7 +853,9 @@ def playwright_m_login(username, password, code_2fa=None):
|
||||
with lock:
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
proxy_url = get_proxy_from_api()
|
||||
proxy_url = ProxyChecker(timeout=8).get_valid_proxy_url()
|
||||
if proxy_url is None:
|
||||
raise OperationFailed("获取代理失败")
|
||||
logger.info(f"使用proxi={proxy_url}")
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
|
||||
Reference in New Issue
Block a user