设置cookies中语言为英文, 添加cookies失效的判断, 更改点赞的规则
This commit is contained in:
104
spider/task.py
104
spider/task.py
@@ -102,27 +102,84 @@ def _edit_privacy(page):
|
|||||||
raise OperationFailed("编辑隐私设置失败") from e
|
raise OperationFailed("编辑隐私设置失败") from e
|
||||||
|
|
||||||
|
|
||||||
def parse_cookies(cookies):
|
import json
|
||||||
if isinstance(cookies, str):
|
from typing import Union, List, Dict, Any
|
||||||
try:
|
|
||||||
cookies = json.loads(cookies)
|
|
||||||
if isinstance(cookies, list):
|
def parse_cookies(cookies: Union[str, dict, list], default_domain: str = '.facebook.com', default_path: str = '/') -> \
|
||||||
return cookies
|
List[Dict[str, Any]]:
|
||||||
except ValueError:
|
"""
|
||||||
c = []
|
解析多种格式的cookies为Playwright可用的格式
|
||||||
for i in cookies.split(';'):
|
|
||||||
kv = i.strip().split('=')
|
Args:
|
||||||
if kv:
|
cookies: 可以是JSON字符串、字典或已经是列表格式
|
||||||
k, v = kv[0], kv[1]
|
default_domain: 默认域名
|
||||||
c.append({'name': k, 'value': v, 'domain': '.facebook.com', 'path': '/'})
|
default_path: 默认路径
|
||||||
return c
|
|
||||||
if isinstance(cookies, dict):
|
Returns:
|
||||||
return [
|
List of cookie dictionaries
|
||||||
{'name': k, 'value': v, 'domain': '.facebook.com', 'path': '/'} for k, v in cookies.items()
|
"""
|
||||||
]
|
# 如果已经是列表,直接返回(添加locale)
|
||||||
else:
|
if isinstance(cookies, list):
|
||||||
|
# 检查是否已包含locale,如果没有则添加
|
||||||
|
if not any(cookie.get('name') == 'locale' for cookie in cookies):
|
||||||
|
cookies.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path})
|
||||||
return cookies
|
return cookies
|
||||||
|
|
||||||
|
# 如果是字典
|
||||||
|
if isinstance(cookies, dict):
|
||||||
|
cookie_list = []
|
||||||
|
for k, v in cookies.items():
|
||||||
|
cookie_list.append({
|
||||||
|
'name': k,
|
||||||
|
'value': str(v),
|
||||||
|
'domain': default_domain,
|
||||||
|
'path': default_path
|
||||||
|
})
|
||||||
|
# 添加locale
|
||||||
|
if 'locale' not in cookies:
|
||||||
|
cookie_list.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path})
|
||||||
|
return cookie_list
|
||||||
|
|
||||||
|
# 如果是字符串
|
||||||
|
if isinstance(cookies, str):
|
||||||
|
# 尝试解析为JSON
|
||||||
|
try:
|
||||||
|
parsed = json.loads(cookies)
|
||||||
|
return parse_cookies(parsed, default_domain, default_path) # 递归调用
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
# 按Cookie字符串格式解析
|
||||||
|
cookie_list = []
|
||||||
|
for cookie_str in cookies.split(';'):
|
||||||
|
cookie_str = cookie_str.strip()
|
||||||
|
if not cookie_str:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 分割键值对
|
||||||
|
parts = cookie_str.split('=', 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
key, value = parts[0].strip(), parts[1].strip()
|
||||||
|
elif len(parts) == 1:
|
||||||
|
key, value = parts[0].strip(), ''
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
|
||||||
|
cookie_list.append({
|
||||||
|
'name': key,
|
||||||
|
'value': value,
|
||||||
|
'domain': default_domain,
|
||||||
|
'path': default_path
|
||||||
|
})
|
||||||
|
|
||||||
|
# 添加locale
|
||||||
|
if not any(cookie['name'] == 'locale' for cookie in cookie_list):
|
||||||
|
cookie_list.append({'name': "locale", 'value': "en_US", 'domain': default_domain, 'path': default_path})
|
||||||
|
|
||||||
|
return cookie_list
|
||||||
|
|
||||||
|
# 如果是不支持的类型
|
||||||
|
raise ValueError(f"Unsupported cookies type: {type(cookies)}")
|
||||||
|
|
||||||
|
|
||||||
def check_freeze_account(uid, max_retries=3, retry_delay=2, timeout=5):
|
def check_freeze_account(uid, max_retries=3, retry_delay=2, timeout=5):
|
||||||
"""
|
"""
|
||||||
@@ -164,6 +221,10 @@ def check_account_status(page, cookies):
|
|||||||
if login_btn:
|
if login_btn:
|
||||||
raise AuthException('该账户登录状态失效', 'invalid')
|
raise AuthException('该账户登录状态失效', 'invalid')
|
||||||
|
|
||||||
|
create_btn = page.query_selector_all('//span[text()="Create new account"]')
|
||||||
|
if create_btn:
|
||||||
|
raise AuthException('该账户登录状态失效', 'invalid')
|
||||||
|
|
||||||
# 判断是否被检测到自动化,这种情况只需要点击按钮就可以继续
|
# 判断是否被检测到自动化,这种情况只需要点击按钮就可以继续
|
||||||
if page.query_selector(
|
if page.query_selector(
|
||||||
'//span[text()="We suspect automated behaviour on your account" or text()="We suspect automated behavior on your account"]') is not None:
|
'//span[text()="We suspect automated behaviour on your account" or text()="We suspect automated behavior on your account"]') is not None:
|
||||||
@@ -209,7 +270,6 @@ def playwright_like(cookies, target_url):
|
|||||||
context = browser.new_context(no_viewport=True)
|
context = browser.new_context(no_viewport=True)
|
||||||
context.add_cookies(parse_cookies(cookies))
|
context.add_cookies(parse_cookies(cookies))
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
|
|
||||||
check_account_status(page, parse_cookies(cookies))
|
check_account_status(page, parse_cookies(cookies))
|
||||||
|
|
||||||
url = 'https://facebook.com'
|
url = 'https://facebook.com'
|
||||||
@@ -219,7 +279,7 @@ def playwright_like(cookies, target_url):
|
|||||||
time.sleep(random.randint(3, 10))
|
time.sleep(random.randint(3, 10))
|
||||||
if 'permalink.php?story_fbid' in target_url or '/posts/' in target_url:
|
if 'permalink.php?story_fbid' in target_url or '/posts/' in target_url:
|
||||||
# 文字或图片类型
|
# 文字或图片类型
|
||||||
button_xpath = '//div[@class="__fb-light-mode x1n2onr6 x1vjfegm"]//span[@data-ad-rendering-role="like_button"]'
|
button_xpath = '//*[@role="dialog"]//span[@data-ad-rendering-role="like_button"]'
|
||||||
elif 'watch/?v' in target_url or '/videos/' in target_url:
|
elif 'watch/?v' in target_url or '/videos/' in target_url:
|
||||||
# 视频类型, 视频类型,
|
# 视频类型, 视频类型,
|
||||||
button_xpath = '//span[@data-ad-rendering-role="like_button"][1]'
|
button_xpath = '//span[@data-ad-rendering-role="like_button"][1]'
|
||||||
@@ -253,7 +313,7 @@ def playwright_like(cookies, target_url):
|
|||||||
screenshot_content = _full_screenshot()
|
screenshot_content = _full_screenshot()
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
key = f'{uuid.uuid4()}.png'
|
key = f'screenshot/{uuid.uuid4()}.png'
|
||||||
put_object(key, screenshot_content)
|
put_object(key, screenshot_content)
|
||||||
return {
|
return {
|
||||||
'response_url': target_url,
|
'response_url': target_url,
|
||||||
|
|||||||
Reference in New Issue
Block a user