diff --git a/spider/task.py b/spider/task.py index 18d01dc..74425f9 100644 --- a/spider/task.py +++ b/spider/task.py @@ -236,48 +236,66 @@ def playwright_like(cookies, target_url): # 获取个个人主页视频数量 -def get_video_count(page, cookies): +def get_post_count(page, cookies): # 进入个人主页视频页面获取最新视频链接 cookies = {i['name']: i['value'] for i in parse_cookies(cookies)} uid = cookies['c_user'] - videos_url = f"https://www.facebook.com/profile.php?id={uid}&sk=videos" # 替换为实际的个人主页URL + videos_url = f"https://www.facebook.com/profile.php?id={uid}" # 替换为实际的个人主页URL retry_goto(page, videos_url) - videos = page.query_selector_all('//img[@alt="View photo"]') - return len(videos) + # 移动页面最底部 + page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + page.wait_for_timeout(random.randint(3, 5) * 1000) + # 这里还是获取视频 + posts = page.query_selector_all('//a[@aria-label="Enlarge"]') + # 返回数量 + logger.info(f"账号{uid} 获取到帖子数量为{len(posts)}") + return len(posts) -def retry_get_new_video(page, cookies, video_count): +def retry_get_new_video(page, cookies, post_count): """ - 每分钟重试一次,5分钟内没获取到新视频则抛出Timeout异常 + 每30秒重试一次,5分钟内没获取到新视频则抛出Timeout异常 Args: page: Playwright页面对象 cookies: Cookies - video_count: 初始视频数量 + post_count: 初始帖子数量 Returns: 函数执行结果或抛出Timeout(如果超时) """ max_duration = 5 * 60 # 5分钟(秒) + retry_interval = 30 # 30秒重试一次 start_time = time.time() attempt = 1 while time.time() - start_time < max_duration: - # 获取当前视频数量 - new_videO_count = get_video_count(page, cookies) - # 如果新视频数量大于初始视频数量,则表示有新视频上传 - if new_videO_count > video_count: - page.query_selector_all('//img[@alt="View photo"]')[0].hover() - page.query_selector_all('//img[@alt="View photo"]')[0].click() - page.reload() - time.sleep(random.randint(3, 5)) - return page.url + # 获取当前帖子数量 + new_post_count = get_post_count(page, cookies) + # 如果新帖子数量大于初始帖子数量,则表示有新帖子上传 + if new_post_count > post_count: + try: + # 尝试点击评论按钮 + comment_buttons = page.query_selector_all( + '//a[@aria-label="Enlarge"]') + if comment_buttons: + comment_buttons[0].hover() + comment_buttons[0].click() + time.sleep(random.randint(3, 5)) + page.reload() + time.sleep(random.randint(3, 5)) + return page.url + except Exception as e: + raise OperationFailed(f"点击评论按钮时出错: {e}") # 计算下一次重试时间 elapsed = time.time() - start_time - if elapsed < max_duration: - sleep_time = 60 - (elapsed % 60) # 确保每分钟执行一次 - print(f"等待 {sleep_time:.1f} 秒后重试...") + remaining_time = max_duration - elapsed + + if remaining_time > 0: + # 等待30秒或剩余时间(取较小值) + sleep_time = min(retry_interval, remaining_time) + print(f"第{attempt}次尝试,等待 {sleep_time:.1f} 秒后重试... (剩余时间: {remaining_time:.1f}秒)") time.sleep(sleep_time) attempt += 1 @@ -307,7 +325,7 @@ def playwright_post(cookies, content, image_key=None): try: # 先获取视频数量 if ".mp4" in image_key: - video_count = get_video_count(page, cookies) + video_count = get_post_count(page, cookies) retry_goto(page, url) time.sleep(random.randint(3, 10)) @@ -884,7 +902,7 @@ def playwright_share(cookies, target_url, content): if __name__ == '__main__': - # cookies = 'sb=mC6pxQuLvNLPTNpF-b9Tk8tK;c_user=61570961343759;xs=18:fBZ4XJkFA69uNg:2:1734940655:-1:-1;fr=0JjwzD0HEedbQSHGt.AWUHNQcfxnkbZ3j5rVd1NgTTGhA.BnaRfv..AAA.0.0.BnaRfv.AWWF3uukqmQ;datr=7xdpZzxiItbht8A5aCDBAhQU' + cookies = 'sb=mC6pxQuLvNLPTNpF-b9Tk8tK;c_user=61570961343759;xs=18:fBZ4XJkFA69uNg:2:1734940655:-1:-1;fr=0JjwzD0HEedbQSHGt.AWUHNQcfxnkbZ3j5rVd1NgTTGhA.BnaRfv..AAA.0.0.BnaRfv.AWWF3uukqmQ;datr=7xdpZzxiItbht8A5aCDBAhQU' # cookies = {"c_user":"61565823476070","datr":"q13hZowje6bbViFxECQpYyp8","fr":"01C6Lt4VArm5hELvx.AWXg75HOo-QNJgbiDl8qFtw_5lc.Bm4V2r..AAA.0.0.Bm4V25.AWWHzUeMTuI","m_pixel_ratio":"1.875","sb":"q13hZgJARsRIDmNJG8xUauAe","wd":"384x686","xs":"50%3A8luhgQ-Ea0vnhg%3A2%3A1726045627%3A-1%3A-1"} # cookies = {"locale": "en_US", "datr": "yDw1aAdAMlMi5KGob7DNfBGF", "sb": "yDw1aGVdzkvrkFcP7az4Ptar", # "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "61576501144993", @@ -895,7 +913,7 @@ if __name__ == '__main__': # comment(cookies, 'ZmVlZGJhY2s6MTIyMTA5NjE0NjU0NzkzNzc5', 'game la', 'xzpq.mp4') # playwright_like(cookies, 'https://www.facebook.com/watch/?v=1007800324567828') # print(playwright_post(cookie, '2025-3-230~like')) - # playwright_post(cookies, '2025-3-26~like', 'rg.jpg') + playwright_post(cookies, '2025-3-26~like', 'rg.jpg') # playwright_comment( # cookies, # 'https://www.facebook.com/permalink.php?story_fbid=122096663738814448&id=61574433449058', @@ -916,9 +934,9 @@ if __name__ == '__main__': # ) # cookies = '{"c_user":"61565405263653","datr":"-YDhZoLWu5zbUIw5cOB2In9s","fr":"0ZmsqLWbmV0Onlspt.AWW1JRfVxQAF-jl0oGY7lBQLYq4.Bm4YD5..AAA.0.0.Bm4YED.AWVf1ae03r4","m_page_voice":"61565405263653","m_pixel_ratio":"2.625","sb":"-YDhZs8LozUoyLe1gj2MCUwW","wd":"412x759","xs":"21%3A8Gt3CwtjVWJUhQ%3A2%3A1726054660%3A-1%3A-1"}' # cookies = '{"datr": "mm0taNtaPfOxWhpxdzpkVjV0", "sb": "mm0taFuFnO_L1FpzkKDiA4lw", "wd": "1920x953", "locale": "en_US", "c_user": "61575901481649", "fr": "0c0y2KyMv8lRJ6NNq.AWe7DLt-TSkoOyn3DhRjhA4ByOITAhfSwaiIw4eQE5ilq4Q4KAY.BoLW2a..AAA.0.0.BoLW3M.AWfHVOhZIAGgDh_3BvPFPi8-YhE", "xs": "29%3ASM0qc4U4Ile_MA%3A2%3A1747807693%3A-1%3A-1", "presence": "C%7B%22t3%22%3A%5B%5D%2C%22utc3%22%3A1747807698911%2C%22v%22%3A1%7D"}' - cookies = '{"locale": "en_US", "datr": "PaB4aGZCgstQYUkBHpEVnEe8", "sb": "PaB4aAgR68sRQtATM6v7gEu5", "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "100094571602733", "fr": "0g0qqVhuLyyrKSaUv.AWdif7wExy29FD7aMjwFvrQFqoBzz-S7Qbeg8la4QMVeGv43eLg.BoeKA9..AAA.0.0.BoeKBQ.AWdj3k5XKtwF766wY3n-cro4yw8", "xs": "15%3A52m6IVmYaMzM3Q%3A2%3A1752735825%3A-1%3A-1"}' + # cookies = '{"locale": "en_US", "datr": "PaB4aGZCgstQYUkBHpEVnEe8", "sb": "PaB4aAgR68sRQtATM6v7gEu5", "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "100094571602733", "fr": "0g0qqVhuLyyrKSaUv.AWdif7wExy29FD7aMjwFvrQFqoBzz-S7Qbeg8la4QMVeGv43eLg.BoeKA9..AAA.0.0.BoeKBQ.AWdj3k5XKtwF766wY3n-cro4yw8", "xs": "15%3A52m6IVmYaMzM3Q%3A2%3A1752735825%3A-1%3A-1"}' # print(playwright_share(cookies, "https://www.facebook.com/groups/1702958116839437/permalink/2210833932718517/", "")) - print(playwright_get_user_profile(cookies)) + # print(playwright_get_user_profile(cookies)) # # 永久链接的帖子点赞 # print(playwright_like(cookies, "https://www.facebook.com/groups/1070754870427928/permalink/1873461830157224/")) #