diff --git a/spider/task.py b/spider/task.py index 3375080..e358f63 100644 --- a/spider/task.py +++ b/spider/task.py @@ -1,5 +1,6 @@ import io import json +import logging import os import random import sys @@ -350,10 +351,34 @@ def get_post_count(page, cookies): videos_url = f"https://www.facebook.com/profile.php?id={uid}" # 替换为实际的个人主页URL retry_goto(page, videos_url) # 移动页面最底部 - page.evaluate("window.scrollTo(0, document.body.scrollHeight)") - page.wait_for_timeout(random.randint(3, 5) * 1000) + # 获取初始高度 + previous_height = page.evaluate("document.body.scrollHeight") + logger.info(f"初始高度: {previous_height}") + + while True: + # 滚动到底部 + page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + + # 等待内容加载 + page.wait_for_timeout(3000) + + # 获取新高度 + new_height = page.evaluate("document.body.scrollHeight") + logger.info(f"新高度: {new_height}") + + # 如果高度没有变化,说明已经到底部 + if new_height == previous_height: + logger.info("已经滚动到底部") + break + + previous_height = new_height + + # 安全限制,防止无限循环 + if previous_height > 10000: # 假设最大高度 + logger.info("达到最大滚动限制") + break # 这里还是获取视频 - posts = page.query_selector_all('//div[@aria-label="Video player"]') + posts = page.query_selector_all('//div[@aria-posinset]') # 返回数量 logger.info(f"账号{uid} 获取到帖子数量为{len(posts)}") return len(posts) @@ -383,14 +408,25 @@ def retry_get_new_video(page, cookies, post_count): if new_post_count > post_count: try: # 尝试点击视频 - comment_buttons = page.query_selector_all( - '//div[@aria-label="Leave a comment"]') + comment_buttons = page.query_selector_all('//div[@aria-posinset]') if comment_buttons: # 使用js去点击第一个评论按钮 - element = page.query_selector_all('//div[@aria-label="Leave a comment"]')[0] - page.evaluate('(element) => element.click()', element) - time.sleep(random.randint(3, 5)) - page.reload() + element = page.query_selector_all('//div[@aria-posinset]')[0] + # 获取元素位置 + bounding_box = element.bounding_box() + + if bounding_box: + # 滚动到特定位置 + page.evaluate( + f''' () => {{ window.scrollTo({{ top: {bounding_box['y']} - window.innerHeight / 2, left: {bounding_box['x']} - window.innerWidth / 2, behavior: 'smooth' }}); }} ''') + + # 等待滚动完成 + page.wait_for_timeout(1000) + + # 点击 + element.click() + + page.reload(timeout=180000) time.sleep(random.randint(3, 5)) return page.url except Exception as e: @@ -452,13 +488,14 @@ def playwright_post(cookies, content, image_key=None): if not image_key: page.click('''//span[contains(text(), "What's on your mind")]''') _edit_privacy(page) - page.type('''//div[contains(@aria-placeholder, "What's on your mind")]''', content, delay=50, timeout=300000) + page.type('''//div[contains(@aria-placeholder, "What's on your mind")]''', content, delay=50, + timeout=300000) page.click('//div[@aria-label="Post"]', timeout=300000) time.sleep(15) post_index = page.locator('//div[@aria-posinset="1"]//a[@role="link"]').nth(2) post_index.click(timeout=600000) time.sleep(5) - page.reload() + page.reload(timeout=180000) post_url = page.url # 视频格式要单独去获取链接 if image_key is not None and ".mp4" in image_key: @@ -1137,7 +1174,8 @@ def playwright_share(cookies, target_url, content): if __name__ == '__main__': - cookies = {"c_user":"61587089832795","datr":"aN5-aZxlIs_oAlMTY-3CMwOc","oo":"v13:1769922152","xs":"15:n1l7-jQRvQ2mnA:2:1769922167:-1:-1"} + cookies = {"c_user": "61587089832795", "datr": "aN5-aZxlIs_oAlMTY-3CMwOc", "oo": "v13:1769922152", + "xs": "15:n1l7-jQRvQ2mnA:2:1769922167:-1:-1"} # print(playwright_set_user_profile(cookies, "61584735094876", "Inaaya", "Inaaya", # "facebook/user_upload/0196f098-851c-7810-b2aa-0833a0a7b09d/8c428558-2d90-4f45-baa3-a25f8a654b5c.png")) # cookies = '{"locale": "en_US", "datr": "ZnGnaBBx0yN7pov19-8_A6Gr", "sb": "ZnGnaDQicDSsVuevkudqio1J", "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "61579364283503", "xs": "34%3AdWeZoaWzFrtdVQ%3A2%3A1755804022%3A-1%3A-1", "oo": "v1%7C3%3A1755804031"}'