From ad81130aa49dc52e5c45f13d4f3b134605ece3e5 Mon Sep 17 00:00:00 2001 From: work Date: Tue, 22 Jul 2025 17:40:18 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E8=B4=A6=E6=88=B7=E4=BF=A1?= =?UTF-8?q?=E6=81=AF=E5=8A=9F=E8=83=BD=E7=9B=B4=E6=8E=A5=E8=BF=9B=E5=85=A5?= =?UTF-8?q?=E9=93=BE=E6=8E=A5(=E7=82=B9=E5=87=BB=E5=8F=AF=E8=83=BD?= =?UTF-8?q?=E4=BC=9A=E6=93=8D=E4=BD=9C=E5=A4=B1=E8=B4=A5),?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- const.py | 2 +- miniofile.py | 55 ++++++++++++++++++++++++++++++++++++++++++++------ spider/task.py | 27 +++++++++++++------------ 3 files changed, 64 insertions(+), 20 deletions(-) diff --git a/const.py b/const.py index 0dc2713..b6f2f08 100644 --- a/const.py +++ b/const.py @@ -16,7 +16,7 @@ ELEMENT = { } # ucloud-us3 -ENDPOINT = "154.19.186.49" +ENDPOINT = "154.19.186.49:80" ACCESS_KEY = '3QcjsqdCYCoy1khuDqia' SECRET_KEY = 'iPXte32y1deoySiDq0RwONUJIiIgvjTVhbfCL8tV' BUCKET = 'facebook' diff --git a/miniofile.py b/miniofile.py index f623992..505a9e3 100644 --- a/miniofile.py +++ b/miniofile.py @@ -1,16 +1,59 @@ from const import ENDPOINT, ACCESS_KEY, SECRET_KEY, BUCKET from minio import Minio +import urllib3 +import time +import logging +# 配置日志 +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# 配置 urllib3 连接池 +http_client = urllib3.PoolManager( + timeout=urllib3.Timeout(connect=10.0, read=30.0), + retries=urllib3.Retry( + total=3, + backoff_factor=0.5, + status_forcelist=[500, 502, 503, 504] + ) +) + +# 创建 MinIO 客户端 client = Minio( endpoint=ENDPOINT, access_key=ACCESS_KEY, secret_key=SECRET_KEY, - secure=False + secure=False, + http_client=http_client ) -def put_object(name, content): - length = len(content.getbuffer()) - content.seek(0) - client.put_object(BUCKET, name, content, length) - +def put_object(name, content, max_retries=3): + """ + 上传对象到 MinIO 服务器,带有重试机制 + + Args: + name: 对象名称 + content: 对象内容 + max_retries: 最大重试次数 + """ + retry_count = 0 + while retry_count < max_retries: + try: + length = len(content.getbuffer()) + content.seek(0) + client.put_object(BUCKET, name, content, length) + logger.info(f"成功上传文件: {name}") + return + except Exception as e: + retry_count += 1 + wait_time = 2 ** retry_count # 指数退避 + logger.warning(f"上传失败 (尝试 {retry_count}/{max_retries}): {str(e)}") + logger.warning(f"等待 {wait_time} 秒后重试...") + time.sleep(wait_time) + + # 如果所有重试都失败,记录错误但不抛出异常 + logger.error(f"上传文件 {name} 失败,已达到最大重试次数") + # 可以选择在这里保存到本地文件系统作为备份 + # 或者实现其他备用存储方案 + raise TimeoutError("上传文件 {name} 失败,已达到最大重试次数") diff --git a/spider/task.py b/spider/task.py index 88f0259..2cdb6be 100644 --- a/spider/task.py +++ b/spider/task.py @@ -19,10 +19,14 @@ from loguru import logger from playwright._impl._page import Page from playwright.sync_api import sync_playwright, Error, TimeoutError +import const from const import BUCKET, BASE_PATH from exceptions import AuthException, OperationFailed from miniofile import client, put_object +# 登录, 检查账号, 获取账号配置, 更新账号配置是否隐藏浏览器 +HEADLESS = True + def sleep(a, b=None): if not b: @@ -423,7 +427,7 @@ def playwright_get_user_profile(cookies, username=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=True, args=['--start-maximized'], executable_path=path + headless=HEADLESS, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) @@ -474,16 +478,18 @@ def playwright_set_user_profile(cookies, username=None, first_name=None, last_na with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=True, args=['--start-maximized'], executable_path=path + headless=HEADLESS, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) page = context.new_page() check_account_status(page, parse_cookies(cookies)) try: - url = 'https://accountscenter.facebook.com/?entry_point=app_settings' + cookies_dict = {i['name']: i['value'] for i in parse_cookies(cookies)} + uid = cookies_dict["c_user"] + + url = f'https://accountscenter.facebook.com/profiles/{uid}' retry_goto(page, url) - page.locator('//div[@role="list"]/div').first.click() if first_name or last_name: if first_name and last_name: @@ -528,7 +534,7 @@ def playwright_check_account_cookies(cookies): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=True, args=['--start-maximized'], executable_path=path + headless=HEADLESS, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) @@ -602,7 +608,7 @@ def playwright_login(username, password, code_2fa=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path + headless=HEADLESS, args=['--start-maximized'], executable_path=path ) random_user_agent = UserAgent().getBrowser("Chrome").get("useragent") logger.info(f"使用ua={random_user_agent}") @@ -716,7 +722,7 @@ def playwright_m_login(username, password, code_2fa=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=True, args=['--start-maximized'], executable_path=path + headless=HEADLESS, args=['--start-maximized'], executable_path=path ) # random_user_agent = UserAgent().getBrowser(["Chrome Mobile iOS"]).get("useragent") random_user_agent = "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.41 Mobile Safari/537.36" @@ -909,7 +915,7 @@ if __name__ == '__main__': # cookies = '{"datr": "mm0taNtaPfOxWhpxdzpkVjV0", "sb": "mm0taFuFnO_L1FpzkKDiA4lw", "wd": "1920x953", "locale": "en_US", "c_user": "61575901481649", "fr": "0c0y2KyMv8lRJ6NNq.AWe7DLt-TSkoOyn3DhRjhA4ByOITAhfSwaiIw4eQE5ilq4Q4KAY.BoLW2a..AAA.0.0.BoLW3M.AWfHVOhZIAGgDh_3BvPFPi8-YhE", "xs": "29%3ASM0qc4U4Ile_MA%3A2%3A1747807693%3A-1%3A-1", "presence": "C%7B%22t3%22%3A%5B%5D%2C%22utc3%22%3A1747807698911%2C%22v%22%3A1%7D"}' cookies = '{"locale": "en_US", "datr": "PaB4aGZCgstQYUkBHpEVnEe8", "sb": "PaB4aAgR68sRQtATM6v7gEu5", "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "100094571602733", "fr": "0g0qqVhuLyyrKSaUv.AWdif7wExy29FD7aMjwFvrQFqoBzz-S7Qbeg8la4QMVeGv43eLg.BoeKA9..AAA.0.0.BoeKBQ.AWdj3k5XKtwF766wY3n-cro4yw8", "xs": "15%3A52m6IVmYaMzM3Q%3A2%3A1752735825%3A-1%3A-1"}' # print(playwright_share(cookies, "https://www.facebook.com/groups/1702958116839437/permalink/2210833932718517/", "")) - # print(playwright_get_user_profile(cookies)) + print(playwright_get_user_profile(cookies)) # # 永久链接的帖子点赞 # print(playwright_like(cookies, "https://www.facebook.com/groups/1070754870427928/permalink/1873461830157224/")) # @@ -972,8 +978,3 @@ if __name__ == '__main__': # 评论 # print(playwright_comment(cookies, "https://www.facebook.com/groups/7423373454348259/permalink/24322822973976709/", # "6")) - - # 转发 - print(playwright_share(cookies, - "https://www.facebook.com/permalink.php?story_fbid=pfbid0xsKBsFVR8qGyRnWPUBkyHyk5tnCTtweaUm59HskuNxqwryw3dPYfbRTmTRt4CoEsl&id=61556406998616", - "也太少了吧"))