更改为loguru, 增加随机ua, 增加隐式等待

This commit is contained in:
work
2025-05-26 17:08:38 +08:00
parent 24722b4363
commit aa4a627db8
2 changed files with 114 additions and 68 deletions

23
main.py
View File

@@ -1,6 +1,7 @@
from concurrent.futures.thread import ThreadPoolExecutor
from spider.task import * from spider.task import *
from logger import error_logger, record_full_log from loguru import logger
logger.add("./log/logging.log", rotation="50 MB")
TASK_TYPE = { TASK_TYPE = {
'get_account_profile': playwright_get_user_profile, 'get_account_profile': playwright_get_user_profile,
@@ -23,10 +24,10 @@ def get_task():
'Content-Type': 'application/json' 'Content-Type': 'application/json'
} }
data = { data = {
"include_task_type": ["login_account"], "include_task_type": ["login_account", "get_account_profile"],
"exclude_task_type": [] "exclude_task_type": []
} }
response = requests.post(url, headers=header, json=data) response = requests.post(url, headers=header, json=data, proxies=None)
if response.status_code == 200: if response.status_code == 200:
result = response.json() result = response.json()
return result return result
@@ -39,13 +40,15 @@ def task_callback(tid, data, status='success', msg='success'):
'data': data, 'data': data,
'message': msg, 'message': msg,
} }
logger.info(f"回调任务: tid:{tid}, status:{status}, data:{data}, msg:{msg}")
response = requests.post( response = requests.post(
f'{HOST}/queue/handle-data', f'{HOST}/queue/handle-data',
json=body json=body,
proxies=None
) )
result = response.json() result = response.json()
if response.status_code != 200: if response.status_code != 200:
raise RuntimeError(f"任务回调失败:{result['msg']}") raise RuntimeError(f"任务回调失败:code={response.status_code} text={result.text}")
def execute_task(tid, task_type, **kwargs): def execute_task(tid, task_type, **kwargs):
@@ -53,10 +56,10 @@ def execute_task(tid, task_type, **kwargs):
result = TASK_TYPE.get(task_type)(**kwargs) result = TASK_TYPE.get(task_type)(**kwargs)
task_callback(tid, data=result) task_callback(tid, data=result)
except (AuthException, OperationFailed) as e: except (AuthException, OperationFailed) as e:
record_full_log(error_logger, e) logger.exception("账号或操作异常")
task_callback(tid, data={}, status=e.error_type, msg=str(e)) task_callback(tid, data={}, status=e.error_type, msg=str(e))
except Exception as e: except Exception as e:
record_full_log(error_logger, e) logger.exception("未捕获异常")
task_callback(tid, data={}, status='failed', msg=str(e)) task_callback(tid, data={}, status='failed', msg=str(e))
@@ -65,13 +68,15 @@ def main():
try: try:
task = get_task() task = get_task()
if task is None: if task is None:
logger.info("无任务")
time.sleep(10) time.sleep(10)
continue continue
logger.info(f"收到任务{task}")
task['data']['tid'] = task['id'] task['data']['tid'] = task['id']
task['data']['task_type'] = task['task_type'] task['data']['task_type'] = task['task_type']
execute_task(**task['data']) execute_task(**task['data'])
except Exception as e: except Exception as e:
error_logger.error(f'Main Error: {e}') logger.error(f'Main Error: {e}')
time.sleep(10) time.sleep(10)

View File

@@ -10,14 +10,16 @@ import uuid
import pyotp import pyotp
import pywintypes import pywintypes
import requests import requests
from fake_useragent import UserAgent # 导入 UserAgent
import win32api import win32api
import win32con import win32con
from PIL import ImageGrab from PIL import ImageGrab
from loguru import logger
from playwright._impl._page import Page
from playwright.sync_api import sync_playwright, Error, TimeoutError from playwright.sync_api import sync_playwright, Error, TimeoutError
from const import BUCKET, BASE_PATH from const import BUCKET, BASE_PATH
from exceptions import AuthException, OperationFailed from exceptions import AuthException, OperationFailed
from logger import error_logger
from miniofile import client, put_object from miniofile import client, put_object
@@ -737,18 +739,30 @@ def update_windows_distinguish(x=1920, y=1080):
def _change_language(page): def _change_language(page):
# 判断是否为英文
lang = page.locator('html').get_attribute('lang')
if lang == "en":
return
sleep(1, 2) sleep(1, 2)
page.locator('//*[@style="height:40px;width:40px"]').first.click() page.locator('//*[@style="height:40px;width:40px"]').first.click()
sleep(1, 2) sleep(1, 2)
page.click('//div[@role="listitem" and @class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"][1]')
# 点击设置图标
page.wait_for_selector(
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -419px")]',
timeout=10000).click()
# 点击语言
page.wait_for_selector('//div[@role="menu"]/div[2]', timeout=10000).click()
page.wait_for_selector(
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -793px")]',
timeout=10000).click()
sleep(1, 2) sleep(1, 2)
page.click('//div[@role="menu"]/div[2]') page.click('//span[text()="English (US)"][1]')
sleep(1, 2)
page.click('//div[@class="x1y1aw1k x4uap5 xwxc41k xkhd6sd"]/div/div[2]')
sleep(1, 2)
page.click('//span[@class="x1lliihq x6ikm8r x10wlt62 x1n2onr6 xlyipyv xuxw1ft" and text()="English (US)"][1]')
sleep(3, 5) sleep(3, 5)
page.wait_for_load_state()
def _edit_privacy(page): def _edit_privacy(page):
@@ -785,29 +799,10 @@ def parse_cookies(cookies):
def check_freeze_account(uid): def check_freeze_account(uid):
# 检查是否冻结 # 检查是否冻结
headers = { headers = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
'cache-control': 'max-age=0',
'dpr': '2',
'priority': 'u=0, i',
'referer': 'https://www.facebook.com/',
'sec-ch-prefers-color-scheme': 'light',
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
'sec-ch-ua-full-version-list': '"Chromium";v="134.0.6998.89", "Not:A-Brand";v="24.0.0.0", "Google Chrome";v="134.0.6998.89"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-model': '""',
'sec-ch-ua-platform': '"macOS"',
'sec-ch-ua-platform-version': '"15.3.2"',
'sec-fetch-dest': 'document',
'sec-fetch-mode': 'navigate',
'sec-fetch-site': 'same-origin',
'sec-fetch-user': '?1',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
'viewport-width': '743',
} }
url = f"https://graph.facebook.com/{uid}/picture?type=normal" url = f"https://graph.facebook.com/{uid}/picture?type=normal"
response = requests.get(url, headers=headers, allow_redirects=False) response = requests.get(url, headers=headers, allow_redirects=False, verify=False)
if response.status_code == 302: if response.status_code == 302:
if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg': if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg':
raise AuthException('该账号已被冻结', 'frozen') raise AuthException('该账号已被冻结', 'frozen')
@@ -1169,8 +1164,51 @@ def get_login_continue_btn(page):
return None return None
def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int = 5):
"""
Attempts to navigate to a URL with retries on timeout using synchronous Playwright.
Args:
page: The synchronous Playwright Page object.
url: The URL to navigate to.
max_retries: Maximum number of retry attempts (including the initial attempt).
retry_delay: Delay in seconds between retries.
"""
# ... potentially other code before goto ...
for attempt in range(max_retries):
try:
if attempt > 0:
logger.info(f"Retrying navigation to {url}, attempt {attempt + 1}/{max_retries}...")
else:
logger.info(f"Navigating to {url}, initial attempt...")
# Make the synchronous goto call within the try block
# Use wait_until='load' as in your traceback, or adjust if needed
page.goto(url, timeout=30000) # Use the synchronous call
print(f"Successfully navigated to {url} on attempt {attempt + 1}")
break # Navigation was successful, exit the retry loop
except TimeoutError as e:
print(f"Navigation to {url} timed out on attempt {attempt + 1}.")
if attempt < max_retries - 1:
print(f"Waiting {retry_delay} seconds before retrying...")
time.sleep(retry_delay) # Use synchronous sleep
else:
print(f"All {max_retries} attempts failed for {url}.")
# If all retries fail, re-raise the exception
raise e
except Exception as e:
# Catch any other unexpected errors during goto
print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}")
# Decide if other exceptions should also trigger retries
# For now, we'll just re-raise other exceptions immediately
raise e
def playwright_login(username, password, code_2fa=None): def playwright_login(username, password, code_2fa=None):
error_logger.info(f"登录账号{username}") logger.info(f"登录账号{username}")
# 检查是否冻结 # 检查是否冻结
check_freeze_account(username) check_freeze_account(username)
@@ -1181,20 +1219,9 @@ def playwright_login(username, password, code_2fa=None):
browser = playwright.chromium.launch( browser = playwright.chromium.launch(
headless=False, args=['--start-maximized'], executable_path=path headless=False, args=['--start-maximized'], executable_path=path
) )
context = browser.new_context(no_viewport=True, random_user_agent = UserAgent().getBrowser("Chrome").get("useragent")
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36') logger.info(f"使用ua={random_user_agent}")
page = context.new_page() context = browser.new_context(no_viewport=True, user_agent=random_user_agent)
url = 'https://www.facebook.com'
page.goto(url)
time.sleep(random.randint(1, 10))
page.locator('//input[@id="email"]').type(username, delay=30)
time.sleep(random.randint(1, 3))
page.locator('//input[@id="pass"]').type(password, delay=30)
time.sleep(random.randint(1, 3))
page.click('//button[@name="login"]')
page.wait_for_load_state()
time.sleep(random.randint(3, 5))
# 设置语言为英文 # 设置语言为英文
context.add_cookies([ context.add_cookies([
{ {
@@ -1207,13 +1234,29 @@ def playwright_login(username, password, code_2fa=None):
"secure": False, "secure": False,
}, },
]) ])
page = context.new_page()
url = 'https://www.facebook.com'
retry_goto(page, url)
page.locator('//input[@id="email"]').type(username, delay=30)
time.sleep(random.randint(1, 3)) time.sleep(random.randint(1, 3))
page.reload() page.locator('//input[@id="pass"]').type(password, delay=30)
time.sleep(random.randint(1, 3)) time.sleep(random.randint(1, 3))
page.click('//button[@name="login"]')
page.wait_for_load_state()
time.sleep(random.randint(3, 5))
arkose_captcha = page.query_selector('#arkose-captcha')
if arkose_captcha:
logger.info(f"账号{username} 弹语音识别验证")
raise OperationFailed("操作失败")
arkose_captcha = page.query_selector('#captcha-recaptcha')
if arkose_captcha:
logger.info(f"账号{username} 弹谷歌验证")
raise OperationFailed("操作失败")
captcha_img = page.query_selector('//img[contains(@src, "captcha")]') captcha_img = page.query_selector('//img[contains(@src, "captcha")]')
if captcha_img: if captcha_img:
error_logger.info(f"账号{username} 需要验证") logger.info(f"账号{username} 需要验证")
data = { data = {
'user': 'ycxxkj', 'user': 'ycxxkj',
'pass2': 'B4DBF06831577C6558F823879061626C', 'pass2': 'B4DBF06831577C6558F823879061626C',
@@ -1229,10 +1272,12 @@ def playwright_login(username, password, code_2fa=None):
page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click() page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click()
else: else:
raise OperationFailed('验证码解析错误') raise OperationFailed('验证码解析错误')
time.sleep(3) # 检查是否还有验证码, 隐式等待60秒
page.wait_for_load_state() page.wait_for_selector(
# 检查是否还有验证码 '//span[@class="x1lliihq x1plvlek xryxfnj x1n2onr6 x1ji0vk5 x18bv5gf x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1qo61fq x81x36d xa4e6wy x1rhavg7 xzsf02u x1yc453h xudqn12 x3x7a5m x1yztbdb"]',
h2 = page.query_selector("//h2/span") timeout=60000)
h2 = page.wait_for_selector(
'//div[@class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"]//h2/span', timeout=60000)
if h2 is None: if h2 is None:
raise OperationFailed('页面有误') raise OperationFailed('页面有误')
else: else:
@@ -1242,7 +1287,7 @@ def playwright_login(username, password, code_2fa=None):
] ]
if not h2.text_content() in text_contexts: if not h2.text_content() in text_contexts:
error_logger.info(f"账号{username} 操作失败") logger.info(f"账号{username} 操作失败")
raise OperationFailed("操作失败") raise OperationFailed("操作失败")
auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]') auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]')
if auth_span: if auth_span:
@@ -1261,19 +1306,15 @@ def playwright_login(username, password, code_2fa=None):
time.sleep(1) time.sleep(1)
page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code) page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code)
page.click('//span[text()="Continue"]') page.click('//span[text()="Continue"]')
# 这里验证可能会很慢, 硬等 # 等待登录成功页面出来
time.sleep(40) page.wait_for_selector(
save_profile = page.query_selector('//span[text()="Save"]') "xpath=//h2[normalize-space()='Youre logged in. Trust this device?'] or //span[text()='Save']",
if save_profile: timeout=60000)
save_profile.click()
trust_device_select = page.query_selector('''//span[text()="Always confirm that it's me"]''')
if trust_device_select:
trust_device_select.click()
time.sleep(3)
c = {i['name']: i['value'] for i in context.cookies()} c = {i['name']: i['value'] for i in context.cookies()}
if c["c_user"] is None: if c["c_user"] is None:
raise OperationFailed("操作失败") raise OperationFailed("操作失败")
logger.info(f"登录账号{username} 登录成功")
context.close() context.close()
browser.close() browser.close()