From 9305da8bea331b41e3497eae7e3e3fdacaeb6cae Mon Sep 17 00:00:00 2001 From: SwZ Date: Thu, 24 Apr 2025 17:26:11 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E5=88=87=E6=8D=A2=E8=AF=AD?= =?UTF-8?q?=E8=A8=80=E5=8A=9F=E8=83=BDxpath=E4=BC=9A=E5=8F=98=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- spider/task.py | 89 +++++++++++++++++++++++++++----------------------- 1 file changed, 48 insertions(+), 41 deletions(-) diff --git a/spider/task.py b/spider/task.py index d1c6be9..c505008 100644 --- a/spider/task.py +++ b/spider/task.py @@ -703,7 +703,9 @@ from lxml import html # raise OperationFailed(f'参数错误, response: {response.text}') -def sleep(a, b): +def sleep(a, b=None): + if not b: + return time.sleep(a) return time.sleep(round(random.uniform(a, b), 1)) @@ -735,7 +737,7 @@ def update_windows_distinguish(x=1920, y=1080): def _change_language(page): sleep(1, 2) - page.click('//*[@id="«R1ldm6l6ismipapd5aq»"]/*[2]') + page.locator('//*[@style="height:40px;width:40px"]').first.click() sleep(1, 2) page.click('//div[@role="listitem" and @class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"][1]') sleep(1, 2) @@ -778,36 +780,41 @@ def parse_cookies(cookies): return cookies -def check_account_status(page): +def check_account_status(page, cookies): try: _change_language(page) except TimeoutError: - page.set_default_timeout(3000) - - try: - # 账户被暂停 - suspended_span = page.query_selector_all('//span[text()="We suspended your account"]') - if suspended_span: - raise AuthException('该账户被暂停', 'frozen') - except TimeoutError: - pass - - try: - # 被封 - lock_img = page.query_selector_all('//img[@src="/images/checkpoint/epsilon/comet/intro.png"]') - if lock_img: - raise AuthException('该账户已被封禁', 'frozen') - except TimeoutError: - pass - - try: - # 无法登录 无效cookies - login_btn = page.query_selector_all('//button[@name="login"]') - if login_btn: - raise AuthException('该账户登录状态失效', 'invalid') - except TimeoutError: - pass - raise OperationFailed('操作超时或该账户异常,请重试') + headers = { + 'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7', + 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8', + 'cache-control': 'max-age=0', + 'dpr': '2', + 'priority': 'u=0, i', + 'referer': 'https://www.facebook.com/', + 'sec-ch-prefers-color-scheme': 'light', + 'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"', + 'sec-ch-ua-full-version-list': '"Chromium";v="134.0.6998.89", "Not:A-Brand";v="24.0.0.0", "Google Chrome";v="134.0.6998.89"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-model': '""', + 'sec-ch-ua-platform': '"macOS"', + 'sec-ch-ua-platform-version': '"15.3.2"', + 'sec-fetch-dest': 'document', + 'sec-fetch-mode': 'navigate', + 'sec-fetch-site': 'same-origin', + 'sec-fetch-user': '?1', + 'upgrade-insecure-requests': '1', + 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36', + 'viewport-width': '743', + } + cookies = {i['name']: i['value'] for i in cookies} + uid = cookies['c_user'] + url = f"https://graph.facebook.com/{uid}/picture?type=normal" + response = requests.get(url, headers=headers, allow_redirects=False) + if response.status_code == 302: + if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg': + raise AuthException('该账号已被冻结', 'frozen') + else: + raise OperationFailed('操作超时或该账户异常,请重试') class RLock(threading._RLock): @@ -835,7 +842,7 @@ def playwright_like(cookies, target_url): page.goto(url) time.sleep(1) page.goto(url) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) if 'permalink.php?story_fbid' in target_url or '/posts/' in target_url: # 文字或图片类型 @@ -885,7 +892,7 @@ def playwright_post(cookies, content, image_key=None): page.goto(url) time.sleep(1) page.goto(url) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) time.sleep(5) if image_key: @@ -942,7 +949,7 @@ def playwright_comment(cookies, target_url, content, image_key=None): url = 'https://facebook.com' try: page.goto(url) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) time.sleep(5) page.goto(target_url) @@ -1019,7 +1026,7 @@ def playwright_get_user_profile(cookies, username=None): page.goto(url) time.sleep(1) page.goto(url) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) profile_pic_url = page.locator( '//div[@aria-label="Shortcuts"]//li[1]//*[@preserveAspectRatio="xMidYMid slice"]').get_attribute( @@ -1068,7 +1075,7 @@ def playwright_set_user_profile(cookies, username=None, first_name=None, last_na page.goto(url) time.sleep(1) page.goto(url) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) url = 'https://accountscenter.facebook.com/?entry_point=app_settings' page.goto(url) @@ -1134,7 +1141,7 @@ def playwright_check_account_cookies(cookies): time.sleep(1) page.goto(url) time.sleep(10) - check_account_status(page) + check_account_status(page, parse_cookies(cookies)) context.close() browser.close() return {} @@ -1146,8 +1153,7 @@ def playwright_login(username, password, code_2fa=None): with sync_playwright() as playwright: update_windows_distinguish() browser = playwright.chromium.launch( - headless=False, args=['--start-maximized'], executable_path=path, - proxy={'server': '127.0.0.1:10889'} + headless=False, args=['--start-maximized'], executable_path=path ) context = browser.new_context(no_viewport=True, user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36') context.add_init_script(path=os.path.join(BASE_PATH, 'stealth.min.js')) @@ -1179,7 +1185,8 @@ def playwright_login(username, password, code_2fa=None): else: raise OperationFailed('验证码解析错误') time.sleep(5) - auth_span = page.query_selector('//span[text()="Try Another Way"]') + page.wait_for_load_state() + auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]') if auth_span: if not code_2fa: raise OperationFailed('缺少2FA密钥') @@ -1207,7 +1214,6 @@ def playwright_login(username, password, code_2fa=None): if __name__ == '__main__': # cookies = 'sb=mC6pxQuLvNLPTNpF-b9Tk8tK;c_user=61570961343759;xs=18:fBZ4XJkFA69uNg:2:1734940655:-1:-1;fr=0JjwzD0HEedbQSHGt.AWUHNQcfxnkbZ3j5rVd1NgTTGhA.BnaRfv..AAA.0.0.BnaRfv.AWWF3uukqmQ;datr=7xdpZzxiItbht8A5aCDBAhQU' - # cookies = 'datr=q13hZowje6bbViFxECQpYyp8; sb=q13hZgJARsRIDmNJG8xUauAe; m_pixel_ratio=1.875; wd=384x686; c_user=61565823476070; fr=01C6Lt4VArm5hELvx.AWXg75HOo-QNJgbiDl8qFtw_5lc.Bm4V2r..AAA.0.0.Bm4V25.AWWHzUeMTuI; xs=50%3A8luhgQ-Ea0vnhg%3A2%3A1726045627%3A-1%3A-1' # cookies = { # 'datr': '4MXgZ3twsUMLaR7_yYPjboTs', # 'sb': '4MXgZ5AKdd6AiVqGy_N0-cpe', @@ -1242,8 +1248,9 @@ if __name__ == '__main__': # # lastname='Keals', # # image_key='rg.jpg' # ) - # playwright_check_account_cookies(cookies) - # cookies = playwright_login('61575067907301', '@Badhon@20', 'D54WZQP7VCNDKAKC66Q3WNQYTOFJSAZY') + cookies = '{"datr": "kgAKaCUX5_TBUoWScwULOUy1", "sb": "kgAKaHuXanVysgh_ZMUr71bV", "wd": "1920x945", "locale": "en_US", "c_user": "61575057767684", "fr": "0CUBUr4ylkIHja0Mx.AWfMivYkYl7BU22lwt8EAI7pzeF59lvJhmEOseS0dxnxlP8SPg4.BoCgCS..AAA.0.0.BoCgCh.AWcb0fzRzetlWo5Jg5bYyhKUbWA", "xs": "12%3AXMKnbFIc1Is6NA%3A2%3A1745485987%3A-1%3A13612", "presence": "C%7B%22t3%22%3A%5B%5D%2C%22utc3%22%3A1745485990508%2C%22v%22%3A1%7D"}' + playwright_check_account_cookies(cookies) + # cookies = playwright_login('61575057767684', '@Badhon@20', 'AMULNV6YOAMDGOQ72NNBQCBMTOFB4BTJ') # print(cookies) pass