diff --git a/spider/task.py b/spider/task.py index 57b3416..f4b7c09 100644 --- a/spider/task.py +++ b/spider/task.py @@ -102,27 +102,84 @@ def _edit_privacy(page): raise OperationFailed("编辑隐私设置失败") from e -def parse_cookies(cookies): - if isinstance(cookies, str): - try: - cookies = json.loads(cookies) - if isinstance(cookies, list): - return cookies - except ValueError: - c = [] - for i in cookies.split(';'): - kv = i.strip().split('=') - if kv: - k, v = kv[0], kv[1] - c.append({'name': k, 'value': v, 'domain': '.facebook.com', 'path': '/'}) - return c - if isinstance(cookies, dict): - return [ - {'name': k, 'value': v, 'domain': '.facebook.com', 'path': '/'} for k, v in cookies.items() - ] - else: +import json +from typing import Union, List, Dict, Any + + +def parse_cookies(cookies: Union[str, dict, list], default_domain: str = '.facebook.com', default_path: str = '/') -> \ + List[Dict[str, Any]]: + """ + 解析多种格式的cookies为Playwright可用的格式 + + Args: + cookies: 可以是JSON字符串、字典或已经是列表格式 + default_domain: 默认域名 + default_path: 默认路径 + + Returns: + List of cookie dictionaries + """ + # 如果已经是列表,直接返回(添加locale) + if isinstance(cookies, list): + # 检查是否已包含locale,如果没有则添加 + if not any(cookie.get('name') == 'locale' for cookie in cookies): + cookies.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path}) return cookies + # 如果是字典 + if isinstance(cookies, dict): + cookie_list = [] + for k, v in cookies.items(): + cookie_list.append({ + 'name': k, + 'value': str(v), + 'domain': default_domain, + 'path': default_path + }) + # 添加locale + if 'locale' not in cookies: + cookie_list.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path}) + return cookie_list + + # 如果是字符串 + if isinstance(cookies, str): + # 尝试解析为JSON + try: + parsed = json.loads(cookies) + return parse_cookies(parsed, default_domain, default_path) # 递归调用 + except (json.JSONDecodeError, ValueError): + # 按Cookie字符串格式解析 + cookie_list = [] + for cookie_str in cookies.split(';'): + cookie_str = cookie_str.strip() + if not cookie_str: + continue + + # 分割键值对 + parts = cookie_str.split('=', 1) + if len(parts) == 2: + key, value = parts[0].strip(), parts[1].strip() + elif len(parts) == 1: + key, value = parts[0].strip(), '' + else: + continue + + cookie_list.append({ + 'name': key, + 'value': value, + 'domain': default_domain, + 'path': default_path + }) + + # 添加locale + if not any(cookie['name'] == 'locale' for cookie in cookie_list): + cookie_list.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path}) + + return cookie_list + + # 如果是不支持的类型 + raise ValueError(f"Unsupported cookies type: {type(cookies)}") + def check_freeze_account(uid, max_retries=3, retry_delay=2, timeout=5): """ @@ -164,6 +221,10 @@ def check_account_status(page, cookies): if login_btn: raise AuthException('该账户登录状态失效', 'invalid') + create_btn = page.query_selector_all('//span[text()="Create new account"]') + if create_btn: + raise AuthException('该账户登录状态失效', 'invalid') + # 判断是否被检测到自动化,这种情况只需要点击按钮就可以继续 if page.query_selector( '//span[text()="We suspect automated behaviour on your account" or text()="We suspect automated behavior on your account"]') is not None: @@ -209,7 +270,6 @@ def playwright_like(cookies, target_url): context = browser.new_context(no_viewport=True) context.add_cookies(parse_cookies(cookies)) page = context.new_page() - check_account_status(page, parse_cookies(cookies)) url = 'https://facebook.com' @@ -219,7 +279,7 @@ def playwright_like(cookies, target_url): time.sleep(random.randint(3, 10)) if 'permalink.php?story_fbid' in target_url or '/posts/' in target_url: # 文字或图片类型 - button_xpath = '//div[@class="__fb-light-mode x1n2onr6 x1vjfegm"]//span[@data-ad-rendering-role="like_button"]' + button_xpath = '//*[@role="dialog"]//span[@data-ad-rendering-role="like_button"]' elif 'watch/?v' in target_url or '/videos/' in target_url: # 视频类型, 视频类型, button_xpath = '//span[@data-ad-rendering-role="like_button"][1]' @@ -253,7 +313,7 @@ def playwright_like(cookies, target_url): screenshot_content = _full_screenshot() context.close() browser.close() - key = f'{uuid.uuid4()}.png' + key = f'screenshot/{uuid.uuid4()}.png' put_object(key, screenshot_content) return { 'response_url': target_url,