diff --git a/main.py b/main.py index c4d253c..f60669b 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ -from concurrent.futures.thread import ThreadPoolExecutor from spider.task import * -from logger import error_logger, record_full_log +from loguru import logger + +logger.add("./log/logging.log", rotation="50 MB") TASK_TYPE = { 'get_account_profile': playwright_get_user_profile, @@ -23,10 +24,10 @@ def get_task(): 'Content-Type': 'application/json' } data = { - "include_task_type": ["login_account"], + "include_task_type": ["login_account", "get_account_profile"], "exclude_task_type": [] } - response = requests.post(url, headers=header, json=data) + response = requests.post(url, headers=header, json=data, proxies=None) if response.status_code == 200: result = response.json() return result @@ -39,13 +40,15 @@ def task_callback(tid, data, status='success', msg='success'): 'data': data, 'message': msg, } + logger.info(f"回调任务: tid:{tid}, status:{status}, data:{data}, msg:{msg}") response = requests.post( f'{HOST}/queue/handle-data', - json=body + json=body, + proxies=None ) result = response.json() if response.status_code != 200: - raise RuntimeError(f"任务回调失败:{result['msg']}") + raise RuntimeError(f"任务回调失败:code={response.status_code} text={result.text}") def execute_task(tid, task_type, **kwargs): @@ -53,10 +56,10 @@ def execute_task(tid, task_type, **kwargs): result = TASK_TYPE.get(task_type)(**kwargs) task_callback(tid, data=result) except (AuthException, OperationFailed) as e: - record_full_log(error_logger, e) + logger.exception("账号或操作异常") task_callback(tid, data={}, status=e.error_type, msg=str(e)) except Exception as e: - record_full_log(error_logger, e) + logger.exception("未捕获异常") task_callback(tid, data={}, status='failed', msg=str(e)) @@ -65,13 +68,15 @@ def main(): try: task = get_task() if task is None: + logger.info("无任务") time.sleep(10) continue + logger.info(f"收到任务{task}") task['data']['tid'] = task['id'] task['data']['task_type'] = task['task_type'] execute_task(**task['data']) except Exception as e: - error_logger.error(f'Main Error: {e}') + logger.error(f'Main Error: {e}') time.sleep(10) diff --git a/spider/task.py b/spider/task.py index 3844a5f..0638e3b 100644 --- a/spider/task.py +++ b/spider/task.py @@ -10,14 +10,16 @@ import uuid import pyotp import pywintypes import requests +from fake_useragent import UserAgent # 导入 UserAgent import win32api import win32con from PIL import ImageGrab +from loguru import logger +from playwright._impl._page import Page from playwright.sync_api import sync_playwright, Error, TimeoutError from const import BUCKET, BASE_PATH from exceptions import AuthException, OperationFailed -from logger import error_logger from miniofile import client, put_object @@ -737,18 +739,30 @@ def update_windows_distinguish(x=1920, y=1080): def _change_language(page): + # 判断是否为英文 + lang = page.locator('html').get_attribute('lang') + if lang == "en": + return + sleep(1, 2) page.locator('//*[@style="height:40px;width:40px"]').first.click() sleep(1, 2) - page.click('//div[@role="listitem" and @class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"][1]') + + # 点击设置图标 + page.wait_for_selector( + '//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -419px")]', + timeout=10000).click() + + # 点击语言 + page.wait_for_selector('//div[@role="menu"]/div[2]', timeout=10000).click() + + page.wait_for_selector( + '//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -793px")]', + timeout=10000).click() + sleep(1, 2) - page.click('//div[@role="menu"]/div[2]') - sleep(1, 2) - page.click('//div[@class="x1y1aw1k x4uap5 xwxc41k xkhd6sd"]/div/div[2]') - sleep(1, 2) - page.click('//span[@class="x1lliihq x6ikm8r x10wlt62 x1n2onr6 xlyipyv xuxw1ft" and text()="English (US)"][1]') + page.click('//span[text()="English (US)"][1]') sleep(3, 5) - page.wait_for_load_state() def _edit_privacy(page): @@ -785,29 +799,10 @@ def parse_cookies(cookies): def check_freeze_account(uid): # 检查是否冻结 headers = { - 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', - 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', - 'cache-control': 'max-age=0', - 'dpr': '2', - 'priority': 'u=0, i', - 'referer': 'https://www.facebook.com/', - 'sec-ch-prefers-color-scheme': 'light', - 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"', - 'sec-ch-ua-full-version-list': '"Chromium";v="134.0.6998.89", "Not:A-Brand";v="24.0.0.0", "Google Chrome";v="134.0.6998.89"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-model': '""', - 'sec-ch-ua-platform': '"macOS"', - 'sec-ch-ua-platform-version': '"15.3.2"', - 'sec-fetch-dest': 'document', - 'sec-fetch-mode': 'navigate', - 'sec-fetch-site': 'same-origin', - 'sec-fetch-user': '?1', - 'upgrade-insecure-requests': '1', 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', - 'viewport-width': '743', } url = f"https://graph.facebook.com/{uid}/picture?type=normal" - response = requests.get(url, headers=headers, allow_redirects=False) + response = requests.get(url, headers=headers, allow_redirects=False, verify=False) if response.status_code == 302: if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg': raise AuthException('该账号已被冻结', 'frozen') @@ -1169,8 +1164,51 @@ def get_login_continue_btn(page): return None +def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int = 5): + """ + Attempts to navigate to a URL with retries on timeout using synchronous Playwright. + + Args: + page: The synchronous Playwright Page object. + url: The URL to navigate to. + max_retries: Maximum number of retry attempts (including the initial attempt). + retry_delay: Delay in seconds between retries. + """ + # ... potentially other code before goto ... + + for attempt in range(max_retries): + try: + if attempt > 0: + logger.info(f"Retrying navigation to {url}, attempt {attempt + 1}/{max_retries}...") + else: + logger.info(f"Navigating to {url}, initial attempt...") + + # Make the synchronous goto call within the try block + # Use wait_until='load' as in your traceback, or adjust if needed + page.goto(url, timeout=30000) # Use the synchronous call + + print(f"Successfully navigated to {url} on attempt {attempt + 1}") + break # Navigation was successful, exit the retry loop + + except TimeoutError as e: + print(f"Navigation to {url} timed out on attempt {attempt + 1}.") + if attempt < max_retries - 1: + print(f"Waiting {retry_delay} seconds before retrying...") + time.sleep(retry_delay) # Use synchronous sleep + else: + print(f"All {max_retries} attempts failed for {url}.") + # If all retries fail, re-raise the exception + raise e + except Exception as e: + # Catch any other unexpected errors during goto + print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}") + # Decide if other exceptions should also trigger retries + # For now, we'll just re-raise other exceptions immediately + raise e + + def playwright_login(username, password, code_2fa=None): - error_logger.info(f"登录账号{username}") + logger.info(f"登录账号{username}") # 检查是否冻结 check_freeze_account(username) @@ -1181,20 +1219,9 @@ def playwright_login(username, password, code_2fa=None): browser = playwright.chromium.launch( headless=False, args=['--start-maximized'], executable_path=path ) - context = browser.new_context(no_viewport=True, - user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36') - page = context.new_page() - url = 'https://www.facebook.com' - page.goto(url) - time.sleep(random.randint(1, 10)) - page.locator('//input[@id="email"]').type(username, delay=30) - time.sleep(random.randint(1, 3)) - page.locator('//input[@id="pass"]').type(password, delay=30) - time.sleep(random.randint(1, 3)) - page.click('//button[@name="login"]') - - page.wait_for_load_state() - time.sleep(random.randint(3, 5)) + random_user_agent = UserAgent().getBrowser("Chrome").get("useragent") + logger.info(f"使用ua={random_user_agent}") + context = browser.new_context(no_viewport=True, user_agent=random_user_agent) # 设置语言为英文 context.add_cookies([ { @@ -1207,13 +1234,29 @@ def playwright_login(username, password, code_2fa=None): "secure": False, }, ]) + page = context.new_page() + url = 'https://www.facebook.com' + retry_goto(page, url) + page.locator('//input[@id="email"]').type(username, delay=30) time.sleep(random.randint(1, 3)) - page.reload() + page.locator('//input[@id="pass"]').type(password, delay=30) time.sleep(random.randint(1, 3)) + page.click('//button[@name="login"]') + page.wait_for_load_state() + time.sleep(random.randint(3, 5)) + arkose_captcha = page.query_selector('#arkose-captcha') + if arkose_captcha: + logger.info(f"账号{username} 弹语音识别验证") + raise OperationFailed("操作失败") + + arkose_captcha = page.query_selector('#captcha-recaptcha') + if arkose_captcha: + logger.info(f"账号{username} 弹谷歌验证") + raise OperationFailed("操作失败") captcha_img = page.query_selector('//img[contains(@src, "captcha")]') if captcha_img: - error_logger.info(f"账号{username} 需要验证") + logger.info(f"账号{username} 需要验证") data = { 'user': 'ycxxkj', 'pass2': 'B4DBF06831577C6558F823879061626C', @@ -1229,10 +1272,12 @@ def playwright_login(username, password, code_2fa=None): page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click() else: raise OperationFailed('验证码解析错误') - time.sleep(3) - page.wait_for_load_state() - # 检查是否还有验证码 - h2 = page.query_selector("//h2/span") + # 检查是否还有验证码, 隐式等待60秒 + page.wait_for_selector( + '//span[@class="x1lliihq x1plvlek xryxfnj x1n2onr6 x1ji0vk5 x18bv5gf x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1qo61fq x81x36d xa4e6wy x1rhavg7 xzsf02u x1yc453h xudqn12 x3x7a5m x1yztbdb"]', + timeout=60000) + h2 = page.wait_for_selector( + '//div[@class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"]//h2/span', timeout=60000) if h2 is None: raise OperationFailed('页面有误') else: @@ -1242,7 +1287,7 @@ def playwright_login(username, password, code_2fa=None): ] if not h2.text_content() in text_contexts: - error_logger.info(f"账号{username} 操作失败") + logger.info(f"账号{username} 操作失败") raise OperationFailed("操作失败") auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]') if auth_span: @@ -1261,19 +1306,15 @@ def playwright_login(username, password, code_2fa=None): time.sleep(1) page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code) page.click('//span[text()="Continue"]') - # 这里验证可能会很慢, 硬等 - time.sleep(40) - save_profile = page.query_selector('//span[text()="Save"]') - if save_profile: - save_profile.click() - trust_device_select = page.query_selector('''//span[text()="Always confirm that it's me"]''') - if trust_device_select: - trust_device_select.click() + # 等待登录成功页面出来 + page.wait_for_selector( + "xpath=//h2[normalize-space()='You’re logged in. Trust this device?'] or //span[text()='Save']", + timeout=60000) - time.sleep(3) c = {i['name']: i['value'] for i in context.cookies()} if c["c_user"] is None: raise OperationFailed("操作失败") + logger.info(f"登录账号{username} 登录成功") context.close() browser.close()