From 29ef1ebc345e1afb64c4a255fa917b68cafbfe17 Mon Sep 17 00:00:00 2001
From: work <work@email.com>
Date: Thu, 2 Apr 2026 13:36:36 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=8F=91=E5=B8=96?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 spider/task.py | 170 +++++++++++++++++++++++++++++++------------------
 1 file changed, 109 insertions(+), 61 deletions(-)

diff --git a/spider/task.py b/spider/task.py
index e7c8756..9729bc3 100644
--- a/spider/task.py
+++ b/spider/task.py
@@ -101,7 +101,7 @@ def _edit_privacy(page):
                 continue_btn.click()
                 sleep(1, 2)
                 page.click('//div[@aria-label="Select audience"]//span[text()="Public"]')
-                page.click('//div[@aria-label="Save"]')
+                page.click('//div[@aria-label="Save privacy audience selection and close dialog"]')
                 sleep(1, 2)
                 return
             else:
@@ -111,7 +111,7 @@ def _edit_privacy(page):
         page.click('//div[contains(@aria-label, "Edit privacy")]')
         sleep(1, 2)
         page.click('//div[@aria-label="Select audience"]//span[text()="Public"]')
-        page.click('//div[@aria-label="Done"]')
+        page.click('//div[@aria-label="Done with privacy audience selection and close dialog"]')
         sleep(1, 2)
     except Error as e:
         logger.error(f"Error editing privacy settings: {e}")
@@ -225,6 +225,17 @@ def is_operation_failed(exception):
     return isinstance(exception, OperationFailed) and "更改语言异常" in str(exception)
 
 
+def is_page_crash_error(exception: Exception) -> bool:
+    error_message = str(exception).lower()
+    crash_patterns = (
+        "page crashed",
+        "target crashed",
+        "target page, context or browser has been closed",
+        "browser has been closed",
+    )
+    return any(pattern in error_message for pattern in crash_patterns)
+
+
 @retry(
     stop=stop_after_attempt(3),
     wait=wait_fixed(2),
@@ -450,77 +461,110 @@ def retry_get_new_video(page, cookies, post_count):
     raise TimeoutError("未获取到新视频(可能视频上传失败)，已超时")
 
 
-@retry(
-    stop=stop_after_attempt(3),
-    wait=wait_fixed(2),
-    retry=retry_if_exception(is_operation_failed)
-)
 def playwright_post(cookies, content, image_key=None):
     path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
     with lock:
         with sync_playwright() as playwright:
             update_windows_distinguish()
-            browser = playwright.chromium.launch(
-                headless=False, args=['--start-maximized'], executable_path=path
-            )
-            context = browser.new_context(no_viewport=True)
-            context.add_cookies(parse_cookies(cookies))
-            page = context.new_page()
-            page.evaluate(f'document.body.style.zoom = "{const.DISPLAY_SCALE}"')
+            max_browser_retries = 3
+            last_error = None
 
-            check_account_status(page, parse_cookies(cookies))
-            # 声明默认发布视频数量
-            video_count = 0
+            for browser_attempt in range(max_browser_retries):
+                browser = None
+                context = None
+                file_path = None
+                try:
+                    browser = playwright.chromium.launch(
+                        headless=False, args=['--start-maximized'], executable_path=path
+                    )
+                    context = browser.new_context(no_viewport=True)
+                    context.add_cookies(parse_cookies(cookies))
+                    page = context.new_page()
 
-            url = 'https://facebook.com'
-            try:
-                # 先获取视频数量
-                if image_key is not None and ".mp4" in image_key:
-                    video_count = get_post_count(page, cookies)
+                    check_account_status(page, parse_cookies(cookies))
+                    # 声明默认发布视频数量
+                    video_count = 0
 
-                retry_goto(page, url)
-                time.sleep(random.randint(3, 10))
-                time.sleep(5)
+                    url = 'https://www.facebook.com'
+                    # 先获取视频数量
+                    if image_key is not None and ".mp4" in image_key:
+                        video_count = get_post_count(page, cookies)
 
-                if image_key:
-                    filename = image_key.split('/')[-1]
-                    file_path = os.path.join(BASE_PATH, 'files', filename)
-                    client.fget_object(BUCKET, image_key, file_path)
-
-                    sleep(1, 2)
-                    page.locator('input[accept="image/*,image/heif,image/heic,video/*,video/mp4,video/x-m4v,video/x-matroska,.mkv"]').set_input_files(file_path)
+                    # check_account_status 已经把页面带到 Facebook 首页，避免重复二次跳转导致页面进程崩溃
+                    if "facebook.com" not in page.url:
+                        retry_goto(page, url)
+                    time.sleep(random.randint(3, 10))
                     time.sleep(5)
 
-                if not image_key:
-                    page.click('''//span[contains(text(), "What's on your mind")]''')
-                _edit_privacy(page)
-                # 修改后 (使用 fill)
-                page.fill('//div[contains(@aria-placeholder, "What\'s on your mind")]', content,
-                          timeout=300000)
-                page.click('//div[@aria-label="Post"]', timeout=300000)
-                time.sleep(15)
-                post_index = page.locator('//div[@aria-posinset="1"]//a[@role="link"]').nth(2)
-                post_index.click(timeout=600000)
-                time.sleep(5)
-                page.reload(timeout=180000)
-                post_url = page.url
-                # 视频格式要单独去获取链接
-                if image_key is not None and ".mp4" in image_key:
-                    post_url = retry_get_new_video(page, cookies, video_count)
-                    time.sleep(random.randint(3, 10))
+                    if image_key:
+                        filename = image_key.split('/')[-1]
+                        file_path = os.path.join(BASE_PATH, 'files', filename)
+                        client.fget_object(BUCKET, image_key, file_path)
 
-            except Error as e:
-                raise OperationFailed(f'操作超时，请重试{e}')
+                        sleep(1, 2)
+                        page.locator('input[accept="image/*,image/heif,image/heic,video/*,video/mp4,video/x-m4v,video/x-matroska,.mkv"]').set_input_files(file_path)
+                        time.sleep(5)
 
-            screenshot_content = _full_screenshot()
-            if image_key:
-                os.remove(file_path)
-            context.close()
-            browser.close()
+                    if not image_key:
+                        page.click('''//span[contains(text(), "What's on your mind")]''')
+                    _edit_privacy(page)
+                    # 修改后 (使用 fill)
+                    page.fill('//div[contains(@aria-placeholder, "What\'s on your mind")]', content,
+                              timeout=300000)
+                    page.click('//div[@aria-label="Post"]', timeout=300000)
+                    time.sleep(15)
+                    post_index = page.locator('//div[@aria-posinset="1"]//a[@role="link"]').nth(2)
+                    post_index.click(timeout=600000)
+                    time.sleep(5)
+                    page.reload(timeout=180000)
+                    post_url = page.url
+                    # 视频格式要单独去获取链接
+                    if image_key is not None and ".mp4" in image_key:
+                        post_url = retry_get_new_video(page, cookies, video_count)
+                        time.sleep(random.randint(3, 10))
 
-    key = f'screenshot/{uuid.uuid4()}.png'
-    put_object(key, screenshot_content)
-    return {'response_url': post_url, 'screenshot_key': key}
+                    screenshot_content = _full_screenshot()
+                    key = f'screenshot/{uuid.uuid4()}.png'
+                    put_object(key, screenshot_content)
+                    return {'response_url': post_url, 'screenshot_key': key}
+
+                except TimeoutError as e:
+                    last_error = e
+                    logger.warning(
+                        f"发布任务超时，尝试重建浏览器重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
+                    )
+                except Error as e:
+                    last_error = e
+                    if is_page_crash_error(e):
+                        logger.warning(
+                            f"发布任务页面崩溃，尝试重建浏览器重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
+                        )
+                    else:
+                        logger.warning(
+                            f"发布任务 Playwright 异常，尝试重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
+                        )
+                finally:
+                    if image_key and file_path and os.path.exists(file_path):
+                        os.remove(file_path)
+                    if context is not None:
+                        try:
+                            context.close()
+                        except Exception:
+                            pass
+                    if browser is not None:
+                        try:
+                            browser.close()
+                        except Exception:
+                            pass
+
+                if browser_attempt < max_browser_retries - 1:
+                    time.sleep(2)
+
+            if isinstance(last_error, TimeoutError):
+                raise OperationFailed(f'操作超时，请重试: {last_error}')
+            if isinstance(last_error, Error) and is_page_crash_error(last_error):
+                raise OperationFailed(f'页面崩溃，请重试: {last_error}')
+            raise OperationFailed(f'操作失败，请重试: {last_error}')
 
 
 def playwright_comment(cookies, target_url, content, image_key=None):
@@ -772,11 +816,15 @@ def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int =
                 print(f"All {max_retries} attempts failed for {url}.")
                 # If all retries fail, re-raise the exception
                 raise e
+        except Error as e:
+            if is_page_crash_error(e):
+                logger.error(f"Navigation page crashed for {url} on attempt {attempt + 1}: {e}")
+            else:
+                logger.error(f"Navigation playwright error for {url} on attempt {attempt + 1}: {e}")
+            raise e
         except Exception as e:
             # Catch any other unexpected errors during goto
             print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}")
-            # Decide if other exceptions should also trigger retries
-            # For now, we'll just re-raise other exceptions immediately
             raise e