Compare commits
10 Commits
9ab12866d3
...
3682f73ae7
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3682f73ae7 | ||
|
|
ac4a6c394b | ||
|
|
97c877f27c | ||
|
|
d4b704b63b | ||
|
|
5111e8288f | ||
|
|
7bbaf8499d | ||
|
|
37f71e3a16 | ||
|
|
29ef1ebc34 | ||
|
|
48b0c6c53d | ||
|
|
0a95b3bf4b |
204
spider/task.py
204
spider/task.py
@@ -101,7 +101,7 @@ def _edit_privacy(page):
|
||||
continue_btn.click()
|
||||
sleep(1, 2)
|
||||
page.click('//div[@aria-label="Select audience"]//span[text()="Public"]')
|
||||
page.click('//div[@aria-label="Save"]')
|
||||
page.click('//div[@aria-label="Save privacy audience selection and close dialog"]')
|
||||
sleep(1, 2)
|
||||
return
|
||||
else:
|
||||
@@ -111,7 +111,7 @@ def _edit_privacy(page):
|
||||
page.click('//div[contains(@aria-label, "Edit privacy")]')
|
||||
sleep(1, 2)
|
||||
page.click('//div[@aria-label="Select audience"]//span[text()="Public"]')
|
||||
page.click('//div[@aria-label="Done"]')
|
||||
page.click('//div[@aria-label="Done with privacy audience selection and close dialog"]')
|
||||
sleep(1, 2)
|
||||
except Error as e:
|
||||
logger.error(f"Error editing privacy settings: {e}")
|
||||
@@ -225,6 +225,17 @@ def is_operation_failed(exception):
|
||||
return isinstance(exception, OperationFailed) and "更改语言异常" in str(exception)
|
||||
|
||||
|
||||
def is_page_crash_error(exception: Exception) -> bool:
|
||||
error_message = str(exception).lower()
|
||||
crash_patterns = (
|
||||
"page crashed",
|
||||
"target crashed",
|
||||
"target page, context or browser has been closed",
|
||||
"browser has been closed",
|
||||
)
|
||||
return any(pattern in error_message for pattern in crash_patterns)
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_fixed(2),
|
||||
@@ -259,7 +270,8 @@ def check_account_status(page, cookies):
|
||||
|
||||
# 判断是否需要运行cookies页面
|
||||
if "flow=user_cookie_choice_v2&source=pft_user_cookie_choice" in page.url:
|
||||
allow_cookies = page.query_selector('//div[@role="dialog"]/div/div/div/div/div[3]/div/div/div[1]/div[1]/div/div')
|
||||
allow_cookies = page.query_selector(
|
||||
'//div[@role="dialog"]/div/div/div/div/div[3]/div/div/div[1]/div[1]/div/div')
|
||||
if allow_cookies is None:
|
||||
raise OperationFailed("允许cookies设置点击失败")
|
||||
allow_cookies.click()
|
||||
@@ -283,28 +295,58 @@ lock = RLock()
|
||||
login_semaphore = threading.Semaphore(2)
|
||||
|
||||
|
||||
def is_post_liked_in_dialog(page):
|
||||
"""判断弹窗内的帖子是否已点赞"""
|
||||
|
||||
# 检查是否存在已点赞按钮
|
||||
remove_like_button = page.query_selector('[role="dialog"] div[aria-label="Remove Like"]')
|
||||
|
||||
if remove_like_button:
|
||||
# 获取点赞数
|
||||
like_count_elem = remove_like_button.query_selector('..').query_selector('span[dir="auto"]')
|
||||
like_count = like_count_elem.inner_text() if like_count_elem else "未知"
|
||||
print(f"帖子已点赞,点赞数:{like_count}")
|
||||
return True
|
||||
else:
|
||||
# 检查是否存在未点赞按钮
|
||||
like_button = page.query_selector('[role="dialog"] div[aria-label="Like"]')
|
||||
if like_button:
|
||||
like_count_elem = like_button.query_selector('..').query_selector('span[dir="auto"]')
|
||||
like_count = like_count_elem.inner_text() if like_count_elem else "0"
|
||||
print(f"帖子未点赞,点赞数:{like_count}")
|
||||
return False, like_count
|
||||
else:
|
||||
print("未找到点赞按钮")
|
||||
return False
|
||||
|
||||
def playwright_like(cookies, target_url):
|
||||
path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
|
||||
with lock:
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
username = 'moremore_51WM1'
|
||||
password = 'TOv5y0nXCZH_JH+5'
|
||||
country = 'US'
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
headless=False, args=['--start-maximized'], executable_path=path
|
||||
headless=False, args=['--start-maximized'], executable_path=path,
|
||||
proxy={
|
||||
"server": "http://pr.oxylabs.io:7777", # 必填
|
||||
"username": f"customer-{username}-cc-{country}",
|
||||
"password": password
|
||||
}
|
||||
)
|
||||
|
||||
context = browser.new_context(no_viewport=True)
|
||||
context.add_cookies(parse_cookies(cookies))
|
||||
page = context.new_page()
|
||||
check_account_status(page, parse_cookies(cookies))
|
||||
|
||||
url = 'https://facebook.com'
|
||||
try:
|
||||
|
||||
retry_goto(page, url)
|
||||
time.sleep(random.randint(3, 10))
|
||||
if 'permalink.php?story_fbid' in target_url or '/permalink/' in target_url or '/posts/' in target_url:
|
||||
# 文字或图片类型
|
||||
button_xpath = '//*[@role="dialog"]//span[text()="Like" or @data-ad-rendering-role="like_button"]'
|
||||
button_xpath = '//*[@role="dialog"]//div[text()="Like" or @data-ad-rendering-role="like_button"]'
|
||||
elif 'watch/?v' in target_url or '/videos/' in target_url:
|
||||
# 视频类型, 视频类型,
|
||||
button_xpath = '//span[@data-ad-rendering-role="like_button"][1]'
|
||||
@@ -323,9 +365,9 @@ def playwright_like(cookies, target_url):
|
||||
# 滚动到按钮所在处
|
||||
button.scroll_into_view_if_needed()
|
||||
# 判断按钮是否已经点过赞
|
||||
if button.get_attribute('style') == "" or button.get_attribute('style') is None:
|
||||
if is_post_liked_in_dialog(page):
|
||||
# 未点过赞进行点赞操作
|
||||
button.click(force=True)
|
||||
button.evaluate("element => element.click()")
|
||||
else:
|
||||
raise OperationFailed("未找到点赞按钮")
|
||||
time.sleep(10)
|
||||
@@ -450,34 +492,37 @@ def retry_get_new_video(page, cookies, post_count):
|
||||
raise TimeoutError("未获取到新视频(可能视频上传失败),已超时")
|
||||
|
||||
|
||||
@retry(
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_fixed(2),
|
||||
retry=retry_if_exception(is_operation_failed)
|
||||
)
|
||||
def playwright_post(cookies, content, image_key=None):
|
||||
path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
|
||||
with lock:
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
max_browser_retries = 3
|
||||
last_error = None
|
||||
|
||||
for browser_attempt in range(max_browser_retries):
|
||||
browser = None
|
||||
context = None
|
||||
file_path = None
|
||||
try:
|
||||
browser = playwright.chromium.launch(
|
||||
headless=False, args=['--start-maximized'], executable_path=path
|
||||
)
|
||||
context = browser.new_context(no_viewport=True)
|
||||
context.add_cookies(parse_cookies(cookies))
|
||||
page = context.new_page()
|
||||
page.evaluate(f'document.body.style.zoom = "{const.DISPLAY_SCALE}"')
|
||||
|
||||
check_account_status(page, parse_cookies(cookies))
|
||||
# 声明默认发布视频数量
|
||||
video_count = 0
|
||||
|
||||
url = 'https://facebook.com'
|
||||
try:
|
||||
url = 'https://www.facebook.com'
|
||||
# 先获取视频数量
|
||||
if image_key is not None and ".mp4" in image_key:
|
||||
video_count = get_post_count(page, cookies)
|
||||
|
||||
# check_account_status 已经把页面带到 Facebook 首页,避免重复二次跳转导致页面进程崩溃
|
||||
if "facebook.com" not in page.url:
|
||||
retry_goto(page, url)
|
||||
time.sleep(random.randint(3, 10))
|
||||
time.sleep(5)
|
||||
@@ -509,19 +554,49 @@ def playwright_post(cookies, content, image_key=None):
|
||||
post_url = retry_get_new_video(page, cookies, video_count)
|
||||
time.sleep(random.randint(3, 10))
|
||||
|
||||
except Error as e:
|
||||
raise OperationFailed(f'操作超时,请重试{e}')
|
||||
|
||||
screenshot_content = _full_screenshot()
|
||||
if image_key:
|
||||
os.remove(file_path)
|
||||
context.close()
|
||||
browser.close()
|
||||
|
||||
key = f'screenshot/{uuid.uuid4()}.png'
|
||||
put_object(key, screenshot_content)
|
||||
return {'response_url': post_url, 'screenshot_key': key}
|
||||
|
||||
except TimeoutError as e:
|
||||
last_error = e
|
||||
logger.warning(
|
||||
f"发布任务超时,尝试重建浏览器重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
|
||||
)
|
||||
except Error as e:
|
||||
last_error = e
|
||||
if is_page_crash_error(e):
|
||||
logger.warning(
|
||||
f"发布任务页面崩溃,尝试重建浏览器重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"发布任务 Playwright 异常,尝试重试: attempt {browser_attempt + 1}/{max_browser_retries}, error={e}"
|
||||
)
|
||||
finally:
|
||||
if image_key and file_path and os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
if context is not None:
|
||||
try:
|
||||
context.close()
|
||||
except Exception:
|
||||
pass
|
||||
if browser is not None:
|
||||
try:
|
||||
browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if browser_attempt < max_browser_retries - 1:
|
||||
time.sleep(2)
|
||||
|
||||
if isinstance(last_error, TimeoutError):
|
||||
raise OperationFailed(f'操作超时,请重试: {last_error}')
|
||||
if isinstance(last_error, Error) and is_page_crash_error(last_error):
|
||||
raise OperationFailed(f'页面崩溃,请重试: {last_error}')
|
||||
raise OperationFailed(f'操作失败,请重试: {last_error}')
|
||||
|
||||
|
||||
def playwright_comment(cookies, target_url, content, image_key=None):
|
||||
path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
|
||||
@@ -529,9 +604,19 @@ def playwright_comment(cookies, target_url, content, image_key=None):
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
|
||||
username = 'moremore_51WM1'
|
||||
password = 'TOv5y0nXCZH_JH+5'
|
||||
country = 'US'
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
headless=False, args=['--start-maximized'], executable_path=path
|
||||
headless=False, args=['--start-maximized'], executable_path=path,
|
||||
proxy={
|
||||
"server": "http://pr.oxylabs.io:7777", # 必填
|
||||
"username": f"customer-{username}-cc-{country}",
|
||||
"password": password
|
||||
}
|
||||
)
|
||||
|
||||
context = browser.new_context(no_viewport=True)
|
||||
context.add_cookies(parse_cookies(cookies))
|
||||
page = context.new_page()
|
||||
@@ -544,9 +629,9 @@ def playwright_comment(cookies, target_url, content, image_key=None):
|
||||
|
||||
if 'permalink.php?story_fbid' in target_url or '/posts/' in target_url or "/permalink/" in target_url:
|
||||
# 文字或图片类型
|
||||
input_xpath = '//div[@role="dialog"]//span[text()="Comment" or@data-ad-rendering-role="comment_button"]'
|
||||
input_xpath = '//div[@role="dialog"]//span[text()="Comment" or @data-ad-rendering-role="comment_button"] | //div[@role="dialog"]//div[@aria-label="Write a comment…" and @role="textbox" and @contenteditable="true"]'
|
||||
attach_xpath = '//div[@id="focused-state-actions-list"]//div[@aria-label="Attach a photo or video"]'
|
||||
comment_xpath = '//div[@aria-label="Comment"]'
|
||||
comment_xpath = '//div[@aria-label="Comment"] | //div[@id="focused-state-composer-submit"] | //div[@aria-label="Post comment" and @role="button"]'
|
||||
page.click(input_xpath)
|
||||
sleep(1, 2)
|
||||
elif 'watch/?v' in target_url or '/videos/' in target_url:
|
||||
@@ -600,8 +685,17 @@ def playwright_get_user_profile(cookies, username=None):
|
||||
with login_semaphore:
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
username = 'moremore_51WM1'
|
||||
password = 'TOv5y0nXCZH_JH+5'
|
||||
country = 'US'
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
headless=const.HEADLESS, args=['--start-maximized'], executable_path=path
|
||||
headless=const.HEADLESS, args=['--start-maximized'], executable_path=path,
|
||||
proxy={
|
||||
"server": "http://pr.oxylabs.io:7777", # 必填
|
||||
"username": f"customer-{username}-cc-{country}",
|
||||
"password": password
|
||||
}
|
||||
)
|
||||
context = browser.new_context(no_viewport=True)
|
||||
context.add_cookies(parse_cookies(cookies))
|
||||
@@ -758,7 +852,7 @@ def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int =
|
||||
|
||||
# Make the synchronous goto call within the try block
|
||||
# Use wait_until='load' as in your traceback, or adjust if needed
|
||||
page.goto(url, timeout=30000, wait_until="domcontentloaded") # Use the synchronous call
|
||||
page.goto(url, timeout=180000) # Use the synchronous call
|
||||
|
||||
print(f"Successfully navigated to {url} on attempt {attempt + 1}")
|
||||
break # Navigation was successful, exit the retry loop
|
||||
@@ -772,11 +866,15 @@ def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int =
|
||||
print(f"All {max_retries} attempts failed for {url}.")
|
||||
# If all retries fail, re-raise the exception
|
||||
raise e
|
||||
except Error as e:
|
||||
if is_page_crash_error(e):
|
||||
logger.error(f"Navigation page crashed for {url} on attempt {attempt + 1}: {e}")
|
||||
else:
|
||||
logger.error(f"Navigation playwright error for {url} on attempt {attempt + 1}: {e}")
|
||||
raise e
|
||||
except Exception as e:
|
||||
# Catch any other unexpected errors during goto
|
||||
print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}")
|
||||
# Decide if other exceptions should also trigger retries
|
||||
# For now, we'll just re-raise other exceptions immediately
|
||||
raise e
|
||||
|
||||
|
||||
@@ -992,14 +1090,21 @@ def playwright_m_login(username, password, code_2fa=None):
|
||||
max_proxy_retries = 3
|
||||
proxy_retry_count = 0
|
||||
while proxy_retry_count < max_proxy_retries:
|
||||
proxy_url = ProxyChecker(timeout=8).get_valid_proxy_url()
|
||||
if proxy_url is None:
|
||||
raise OperationFailed("获取代理失败")
|
||||
logger.info(f"使用proxy={proxy_url}")
|
||||
# proxy_url = ProxyChecker(timeout=8).get_valid_proxy_url()
|
||||
# if proxy_url is None:
|
||||
# raise OperationFailed("获取代理失败")
|
||||
# logger.info(f"使用proxy={proxy_url}")
|
||||
|
||||
username = 'moremore_51WM1'
|
||||
password = 'TOv5y0nXCZH_JH+5'
|
||||
country = 'US'
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
headless=const.HEADLESS, args=['--start-maximized'], executable_path=path, proxy={
|
||||
"server": proxy_url,
|
||||
headless=const.HEADLESS, args=['--start-maximized'], executable_path=path,
|
||||
proxy={
|
||||
"server": "http://pr.oxylabs.io:7777", # 必填
|
||||
"username": f"customer-{username}-cc-{country}",
|
||||
"password": password
|
||||
}
|
||||
)
|
||||
# random_user_agent = UserAgent().getBrowser(["Chrome Mobile iOS"]).get("useragent")
|
||||
@@ -1117,9 +1222,19 @@ def playwright_share(cookies, target_url, content):
|
||||
with sync_playwright() as playwright:
|
||||
update_windows_distinguish()
|
||||
|
||||
username = 'moremore_51WM1'
|
||||
password = 'TOv5y0nXCZH_JH+5'
|
||||
country = 'US'
|
||||
|
||||
browser = playwright.chromium.launch(
|
||||
headless=False, args=['--start-maximized'], executable_path=path
|
||||
headless=False, args=['--start-maximized'], executable_path=path,
|
||||
proxy={
|
||||
"server": "http://pr.oxylabs.io:7777", # 必填
|
||||
"username": f"customer-{username}-cc-{country}",
|
||||
"password": password
|
||||
}
|
||||
)
|
||||
|
||||
context = browser.new_context(no_viewport=True)
|
||||
context.add_cookies(parse_cookies(cookies))
|
||||
page = context.new_page()
|
||||
@@ -1181,8 +1296,8 @@ def playwright_share(cookies, target_url, content):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cookies = "c_user=61587708421547; xs=32:70ZigyqtoCfQ0w:2:1770569737:-1:-1; oo=v13:1770569731; datr=A8CIafB8DHVOhB7TfY9GWAIG"
|
||||
print(playwright_check_account_cookies(cookies))
|
||||
cookies = {"c_user":"61585626484995","datr":"NGBPadlTOQH5fwTPcUTGtLI5","dpr":"0.3400000035762787","fr":"0OHgdNICbgL23ZzCq..BptI5c..AAA.0.0.BptI5l.AWf9nkRp50sAbeHCdwhX--SsOVA","m_pixel_ratio":"0.3400000035762787","ps_l":"1","ps_n":"1","sb":"-Y-0adE6TkW7Fd070vzZxn5H","useragent":"TW96aWxsYS81LjAgKExpbnV4OyBBbmRyb2lkIDEwOyBGaW5kIFg1IFBybyBCdWlsZC9UUDFBLjIyMTAwNS4wMDI7IHd2KSBBcHBsZVdlYktpdC81MzcuMzYgKEtIVE1MLCBsaWtlIEdlY2tvKSBWZXJzaW9uLzQuMCBDaHJvbWUvMTI3LjAuMC4wIE1vYmlsZSBTYWZhcmkvNTM3LjM2IFtGQkFOL0VNQTtGQkxDL2VuX1VTO0ZCQVYvNDE5LjAuMC4xMC4xMjA7XQ%3D%3D","wd":"500x402","x-referer":"eyJyIjoiL3NlY3VyaXR5LzJmYWMvc2V0dXAvcXJjb2RlL2dlbmVyYXRlLz9leHQ9MTc3MzcwMDE2NiZoYXNoPUFlY2Vld0J0M0Q3UnhaMDgwTVBYMHR0UDFKUSIsImgiOiIvc2VjdXJpdHkvMmZhYy9zZXR1cC9xcmNvZGUvZ2VuZXJhdGUvP2V4dD0xNzczNzAwMTY2Jmhhc2g9QWVjZWV3QnQzRDdSeFowODBNUFgwdHRQMUpRIiwicyI6Im0ifQ%3D%3D","xs":"42:KzK0Y_fjLkJzUQ:2:1766809815:-1:-1"}
|
||||
# print(playwright_check_account_cookies(cookies))
|
||||
# print(playwright_set_user_profile(cookies, "61584735094876", "Inaaya", "Inaaya",
|
||||
# "facebook/user_upload/0196f098-851c-7810-b2aa-0833a0a7b09d/8c428558-2d90-4f45-baa3-a25f8a654b5c.png"))
|
||||
# cookies = '{"locale": "en_US", "datr": "ZnGnaBBx0yN7pov19-8_A6Gr", "sb": "ZnGnaDQicDSsVuevkudqio1J", "m_pixel_ratio": "1", "wd": "1920x1080", "test_cookie": "CheckForPermission", "c_user": "61579364283503", "xs": "34%3AdWeZoaWzFrtdVQ%3A2%3A1755804022%3A-1%3A-1", "oo": "v1%7C3%3A1755804031"}'
|
||||
@@ -1194,7 +1309,8 @@ if __name__ == '__main__':
|
||||
# post(cookies, 'cs2025')
|
||||
# like(cookies, 'ZmVlZGJhY2s6MTIyMTA5NjE0NjU0NzkzNzc5')
|
||||
# comment(cookies, 'ZmVlZGJhY2s6MTIyMTA5NjE0NjU0NzkzNzc5', 'game la', 'xzpq.mp4')
|
||||
# playwright_like(cookies, 'https://www.facebook.com/watch/?v=1007800324567828')
|
||||
# playwright_like(cookies,
|
||||
# 'https://www.facebook.com/permalink.php?story_fbid=pfbid026dQik7NyujSGd2rjWyTKbQVw2CVM5BeJEP9sYxo7K5TBGhWZ4sUfs4gMoSLKq2mJl&id=61575372223339')
|
||||
# print(playwright_post(cookies, '1111', image_key="1.mp4"))
|
||||
# playwright_post(cookies, '2025-3-26~like', "")
|
||||
# playwright_comment(
|
||||
@@ -1206,7 +1322,7 @@ if __name__ == '__main__':
|
||||
# '2025-3-26~like',
|
||||
# # 'rg.jpg'
|
||||
# )
|
||||
# print(playwright_get_user_profile(cookies))
|
||||
print(playwright_get_user_profile(cookies))
|
||||
# print(_change_language(cookies))
|
||||
# playwright_set_user_profile(
|
||||
# cookies,
|
||||
|
||||
Reference in New Issue
Block a user