From 818c01b1af3f9907c6fd2a0820fa010859d4880e Mon Sep 17 00:00:00 2001 From: work Date: Tue, 8 Jul 2025 16:55:49 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A4=E6=96=AD=E6=98=AF=E5=90=A6=E8=A2=AB?= =?UTF-8?q?=E6=A3=80=E6=B5=8B=E8=87=AA=E5=8A=A8=E5=8C=96=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E6=96=87=E6=9C=AC=E6=9D=A1=E4=BB=B6,=20=E7=99=BB=E5=BD=95,=20?= =?UTF-8?q?=E8=8E=B7=E5=8F=96=E8=B4=A6=E5=8F=B7=E4=BF=A1=E6=81=AF,=20?= =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=B4=A6=E5=8F=B7=E4=BF=A1=E6=81=AF,=20?= =?UTF-8?q?=E6=A3=80=E6=9F=A5=E8=B4=A6=E5=8F=B7cookies=E4=B8=BA=E6=97=A0?= =?UTF-8?q?=E5=A4=B4=E6=A8=A1=E5=BC=8F=E8=BF=90=E8=A1=8C.=20=E8=8E=B7?= =?UTF-8?q?=E5=8F=96=E4=BB=BB=E5=8A=A1=E6=9B=B4=E6=94=B9=E4=B8=BA2?= =?UTF-8?q?=E4=B8=AA=E5=B7=A5=E4=BD=9C=E7=BA=BF=E7=A8=8B=E9=98=B2=E6=AD=A2?= =?UTF-8?q?=E9=98=9F=E5=88=97=E4=BB=BB=E5=8A=A1=E8=BF=87=E5=A4=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 7 ++++--- spider/task.py | 12 ++++++------ 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/main.py b/main.py index ca8babc..13ff7c8 100644 --- a/main.py +++ b/main.py @@ -79,15 +79,15 @@ def main(): # 存储正在运行的任务 running_tasks = set() - # 创建线程池执行任务(3个工作线程) - with ThreadPoolExecutor(max_workers=3) as executor: + # 创建线程池执行任务(2个工作线程) + with ThreadPoolExecutor(max_workers=2) as executor: while True: try: # 清理已完成的任务 running_tasks = {task for task in running_tasks if not task.done()} # 如果队列未满,尝试获取新任务 - if len(running_tasks) < 3: + if len(running_tasks) < 2: task = get_task() if task: logger.info(f"收到任务: {task['id']} - {task['task_type']}") @@ -102,6 +102,7 @@ def main(): time.sleep(10) else: # 达到最大并发数时等待 + logger.info("等待任务完成") time.sleep(1) except Exception as e: diff --git a/spider/task.py b/spider/task.py index 1297446..71eb5c9 100644 --- a/spider/task.py +++ b/spider/task.py @@ -835,7 +835,7 @@ def check_account_status(page, cookies): raise AuthException('该账户登录状态失效', 'invalid') # 判断是否被检测到自动化,这种情况只需要点击按钮就可以继续 - if page.query_selector('//span[text()="We suspect automated behavior on your account"]') is not None: + if page.query_selector('//span[text()="We suspect automated behaviour on your account" or text()="We suspect automated behavior on your account"]') is not None: page.click('//span[text()="Dismiss"]') time.sleep(3) retry_goto(page, 'https://www.facebook.com') @@ -1055,7 +1055,7 @@ def playwright_get_user_profile(cookies, username=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path + headless=True, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) @@ -1099,11 +1099,11 @@ def playwright_set_user_profile(cookies, username=None, first_name=None, last_na return path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe') - with lock: + with login_semaphore: with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path + headless=True, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) @@ -1163,7 +1163,7 @@ def playwright_check_account_cookies(cookies): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path + headless=True, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) @@ -1351,7 +1351,7 @@ def playwright_m_login(username, password, code_2fa=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path + headless=True, args=['--start-maximized'], executable_path=path ) # random_user_agent = UserAgent().getBrowser(["Chrome Mobile iOS"]).get("useragent") random_user_agent = "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Mobile Safari/537.36"