更改为loguru, 增加随机ua, 增加隐式等待
This commit is contained in:
159
spider/task.py
159
spider/task.py
@@ -10,14 +10,16 @@ import uuid
|
||||
import pyotp
|
||||
import pywintypes
|
||||
import requests
|
||||
from fake_useragent import UserAgent # 导入 UserAgent
|
||||
import win32api
|
||||
import win32con
|
||||
from PIL import ImageGrab
|
||||
from loguru import logger
|
||||
from playwright._impl._page import Page
|
||||
from playwright.sync_api import sync_playwright, Error, TimeoutError
|
||||
|
||||
from const import BUCKET, BASE_PATH
|
||||
from exceptions import AuthException, OperationFailed
|
||||
from logger import error_logger
|
||||
from miniofile import client, put_object
|
||||
|
||||
|
||||
@@ -737,18 +739,30 @@ def update_windows_distinguish(x=1920, y=1080):
|
||||
|
||||
|
||||
def _change_language(page):
|
||||
# 判断是否为英文
|
||||
lang = page.locator('html').get_attribute('lang')
|
||||
if lang == "en":
|
||||
return
|
||||
|
||||
sleep(1, 2)
|
||||
page.locator('//*[@style="height:40px;width:40px"]').first.click()
|
||||
sleep(1, 2)
|
||||
page.click('//div[@role="listitem" and @class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"][1]')
|
||||
|
||||
# 点击设置图标
|
||||
page.wait_for_selector(
|
||||
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -419px")]',
|
||||
timeout=10000).click()
|
||||
|
||||
# 点击语言
|
||||
page.wait_for_selector('//div[@role="menu"]/div[2]', timeout=10000).click()
|
||||
|
||||
page.wait_for_selector(
|
||||
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -793px")]',
|
||||
timeout=10000).click()
|
||||
|
||||
sleep(1, 2)
|
||||
page.click('//div[@role="menu"]/div[2]')
|
||||
sleep(1, 2)
|
||||
page.click('//div[@class="x1y1aw1k x4uap5 xwxc41k xkhd6sd"]/div/div[2]')
|
||||
sleep(1, 2)
|
||||
page.click('//span[@class="x1lliihq x6ikm8r x10wlt62 x1n2onr6 xlyipyv xuxw1ft" and text()="English (US)"][1]')
|
||||
page.click('//span[text()="English (US)"][1]')
|
||||
sleep(3, 5)
|
||||
page.wait_for_load_state()
|
||||
|
||||
|
||||
def _edit_privacy(page):
|
||||
@@ -785,29 +799,10 @@ def parse_cookies(cookies):
|
||||
def check_freeze_account(uid):
|
||||
# 检查是否冻结
|
||||
headers = {
|
||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
||||
'cache-control': 'max-age=0',
|
||||
'dpr': '2',
|
||||
'priority': 'u=0, i',
|
||||
'referer': 'https://www.facebook.com/',
|
||||
'sec-ch-prefers-color-scheme': 'light',
|
||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
||||
'sec-ch-ua-full-version-list': '"Chromium";v="134.0.6998.89", "Not:A-Brand";v="24.0.0.0", "Google Chrome";v="134.0.6998.89"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-model': '""',
|
||||
'sec-ch-ua-platform': '"macOS"',
|
||||
'sec-ch-ua-platform-version': '"15.3.2"',
|
||||
'sec-fetch-dest': 'document',
|
||||
'sec-fetch-mode': 'navigate',
|
||||
'sec-fetch-site': 'same-origin',
|
||||
'sec-fetch-user': '?1',
|
||||
'upgrade-insecure-requests': '1',
|
||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||
'viewport-width': '743',
|
||||
}
|
||||
url = f"https://graph.facebook.com/{uid}/picture?type=normal"
|
||||
response = requests.get(url, headers=headers, allow_redirects=False)
|
||||
response = requests.get(url, headers=headers, allow_redirects=False, verify=False)
|
||||
if response.status_code == 302:
|
||||
if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg':
|
||||
raise AuthException('该账号已被冻结', 'frozen')
|
||||
@@ -1169,8 +1164,51 @@ def get_login_continue_btn(page):
|
||||
return None
|
||||
|
||||
|
||||
def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int = 5):
|
||||
"""
|
||||
Attempts to navigate to a URL with retries on timeout using synchronous Playwright.
|
||||
|
||||
Args:
|
||||
page: The synchronous Playwright Page object.
|
||||
url: The URL to navigate to.
|
||||
max_retries: Maximum number of retry attempts (including the initial attempt).
|
||||
retry_delay: Delay in seconds between retries.
|
||||
"""
|
||||
# ... potentially other code before goto ...
|
||||
|
||||
for attempt in range(max_retries):
|
||||
try:
|
||||
if attempt > 0:
|
||||
logger.info(f"Retrying navigation to {url}, attempt {attempt + 1}/{max_retries}...")
|
||||
else:
|
||||
logger.info(f"Navigating to {url}, initial attempt...")
|
||||
|
||||
# Make the synchronous goto call within the try block
|
||||
# Use wait_until='load' as in your traceback, or adjust if needed
|
||||
page.goto(url, timeout=30000) # Use the synchronous call
|
||||
|
||||
print(f"Successfully navigated to {url} on attempt {attempt + 1}")
|
||||
break # Navigation was successful, exit the retry loop
|
||||
|
||||
except TimeoutError as e:
|
||||
print(f"Navigation to {url} timed out on attempt {attempt + 1}.")
|
||||
if attempt < max_retries - 1:
|
||||
print(f"Waiting {retry_delay} seconds before retrying...")
|
||||
time.sleep(retry_delay) # Use synchronous sleep
|
||||
else:
|
||||
print(f"All {max_retries} attempts failed for {url}.")
|
||||
# If all retries fail, re-raise the exception
|
||||
raise e
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors during goto
|
||||
print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}")
|
||||
# Decide if other exceptions should also trigger retries
|
||||
# For now, we'll just re-raise other exceptions immediately
|
||||
raise e
|
||||
|
||||
|
||||
def playwright_login(username, password, code_2fa=None):
|
||||
error_logger.info(f"登录账号{username}")
|
||||
logger.info(f"登录账号{username}")
|
||||
# 检查是否冻结
|
||||
check_freeze_account(username)
|
||||
|
||||
@@ -1181,20 +1219,9 @@ def playwright_login(username, password, code_2fa=None):
|
||||
browser = playwright.chromium.launch(
|
||||
headless=False, args=['--start-maximized'], executable_path=path
|
||||
)
|
||||
context = browser.new_context(no_viewport=True,
|
||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36')
|
||||
page = context.new_page()
|
||||
url = 'https://www.facebook.com'
|
||||
page.goto(url)
|
||||
time.sleep(random.randint(1, 10))
|
||||
page.locator('//input[@id="email"]').type(username, delay=30)
|
||||
time.sleep(random.randint(1, 3))
|
||||
page.locator('//input[@id="pass"]').type(password, delay=30)
|
||||
time.sleep(random.randint(1, 3))
|
||||
page.click('//button[@name="login"]')
|
||||
|
||||
page.wait_for_load_state()
|
||||
time.sleep(random.randint(3, 5))
|
||||
random_user_agent = UserAgent().getBrowser("Chrome").get("useragent")
|
||||
logger.info(f"使用ua={random_user_agent}")
|
||||
context = browser.new_context(no_viewport=True, user_agent=random_user_agent)
|
||||
# 设置语言为英文
|
||||
context.add_cookies([
|
||||
{
|
||||
@@ -1207,13 +1234,29 @@ def playwright_login(username, password, code_2fa=None):
|
||||
"secure": False,
|
||||
},
|
||||
])
|
||||
page = context.new_page()
|
||||
url = 'https://www.facebook.com'
|
||||
retry_goto(page, url)
|
||||
page.locator('//input[@id="email"]').type(username, delay=30)
|
||||
time.sleep(random.randint(1, 3))
|
||||
page.reload()
|
||||
page.locator('//input[@id="pass"]').type(password, delay=30)
|
||||
time.sleep(random.randint(1, 3))
|
||||
page.click('//button[@name="login"]')
|
||||
page.wait_for_load_state()
|
||||
time.sleep(random.randint(3, 5))
|
||||
arkose_captcha = page.query_selector('#arkose-captcha')
|
||||
if arkose_captcha:
|
||||
logger.info(f"账号{username} 弹语音识别验证")
|
||||
raise OperationFailed("操作失败")
|
||||
|
||||
arkose_captcha = page.query_selector('#captcha-recaptcha')
|
||||
if arkose_captcha:
|
||||
logger.info(f"账号{username} 弹谷歌验证")
|
||||
raise OperationFailed("操作失败")
|
||||
|
||||
captcha_img = page.query_selector('//img[contains(@src, "captcha")]')
|
||||
if captcha_img:
|
||||
error_logger.info(f"账号{username} 需要验证")
|
||||
logger.info(f"账号{username} 需要验证")
|
||||
data = {
|
||||
'user': 'ycxxkj',
|
||||
'pass2': 'B4DBF06831577C6558F823879061626C',
|
||||
@@ -1229,10 +1272,12 @@ def playwright_login(username, password, code_2fa=None):
|
||||
page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click()
|
||||
else:
|
||||
raise OperationFailed('验证码解析错误')
|
||||
time.sleep(3)
|
||||
page.wait_for_load_state()
|
||||
# 检查是否还有验证码
|
||||
h2 = page.query_selector("//h2/span")
|
||||
# 检查是否还有验证码, 隐式等待60秒
|
||||
page.wait_for_selector(
|
||||
'//span[@class="x1lliihq x1plvlek xryxfnj x1n2onr6 x1ji0vk5 x18bv5gf x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1qo61fq x81x36d xa4e6wy x1rhavg7 xzsf02u x1yc453h xudqn12 x3x7a5m x1yztbdb"]',
|
||||
timeout=60000)
|
||||
h2 = page.wait_for_selector(
|
||||
'//div[@class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"]//h2/span', timeout=60000)
|
||||
if h2 is None:
|
||||
raise OperationFailed('页面有误')
|
||||
else:
|
||||
@@ -1242,7 +1287,7 @@ def playwright_login(username, password, code_2fa=None):
|
||||
]
|
||||
|
||||
if not h2.text_content() in text_contexts:
|
||||
error_logger.info(f"账号{username} 操作失败")
|
||||
logger.info(f"账号{username} 操作失败")
|
||||
raise OperationFailed("操作失败")
|
||||
auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]')
|
||||
if auth_span:
|
||||
@@ -1261,19 +1306,15 @@ def playwright_login(username, password, code_2fa=None):
|
||||
time.sleep(1)
|
||||
page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code)
|
||||
page.click('//span[text()="Continue"]')
|
||||
# 这里验证可能会很慢, 硬等
|
||||
time.sleep(40)
|
||||
save_profile = page.query_selector('//span[text()="Save"]')
|
||||
if save_profile:
|
||||
save_profile.click()
|
||||
trust_device_select = page.query_selector('''//span[text()="Always confirm that it's me"]''')
|
||||
if trust_device_select:
|
||||
trust_device_select.click()
|
||||
# 等待登录成功页面出来
|
||||
page.wait_for_selector(
|
||||
"xpath=//h2[normalize-space()='You’re logged in. Trust this device?'] or //span[text()='Save']",
|
||||
timeout=60000)
|
||||
|
||||
time.sleep(3)
|
||||
c = {i['name']: i['value'] for i in context.cookies()}
|
||||
if c["c_user"] is None:
|
||||
raise OperationFailed("操作失败")
|
||||
logger.info(f"登录账号{username} 登录成功")
|
||||
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
Reference in New Issue
Block a user