优化发帖
All checks were successful
Update Code / StopService (windows-101.36.102.136) (push) Successful in 1s
Update Code / StopService (windows-101.36.104.175) (push) Successful in 1s
Update Code / CD (windows-101.36.102.136) (push) Successful in 8s
Update Code / CD (windows-101.36.104.175) (push) Successful in 12s
All checks were successful
Update Code / StopService (windows-101.36.102.136) (push) Successful in 1s
Update Code / StopService (windows-101.36.104.175) (push) Successful in 1s
Update Code / CD (windows-101.36.102.136) (push) Successful in 8s
Update Code / CD (windows-101.36.104.175) (push) Successful in 12s
This commit is contained in:
203
spider/task.py
203
spider/task.py
@@ -432,73 +432,173 @@ def retry_get_new_video(page, cookies, post_count):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
page: Playwright页面对象
|
page: Playwright页面对象
|
||||||
cookies: Cookies
|
cookies: Cookies (此处未使用,保留接口)
|
||||||
post_count: 初始帖子数量
|
post_count: 初始帖子数量
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
函数执行结果或抛出Timeout(如果超时)
|
新视频的页面URL
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
TimeoutError: 超时未获取到新视频
|
||||||
|
OperationFailed: 点击视频时出错
|
||||||
"""
|
"""
|
||||||
max_duration = 10 * 60 # 5分钟(秒)
|
max_duration = 10 * 60 # 5分钟(300秒)
|
||||||
retry_interval = 30 # 30秒重试一次
|
retry_interval = 30 # 30秒重试一次
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
attempt = 1
|
attempt = 1
|
||||||
|
|
||||||
while time.time() - start_time < max_duration:
|
while time.time() - start_time < max_duration:
|
||||||
# 获取当前帖子数量
|
# 1. 滚动到页面底部,触发懒加载新内容
|
||||||
|
print("滚动到页面底部,加载更多内容...")
|
||||||
|
page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
|
||||||
|
# 等待新内容加载(可调整时间或使用网络空闲等待)
|
||||||
|
page.wait_for_timeout(3000) # 等待3秒让新帖子渲染
|
||||||
|
# 可选:等待网络空闲确保动态内容加载完成
|
||||||
|
# page.wait_for_load_state("networkidle")
|
||||||
|
|
||||||
|
# 2. 获取当前帖子数量
|
||||||
new_post_count = get_post_count(page, cookies)
|
new_post_count = get_post_count(page, cookies)
|
||||||
# 如果新帖子数量大于初始帖子数量,则表示有新帖子上传
|
print(f"第{attempt}次检查: 初始帖子数={post_count}, 当前帖子数={new_post_count}")
|
||||||
|
|
||||||
|
# 3. 如果有新帖子(视频)
|
||||||
if new_post_count > post_count:
|
if new_post_count > post_count:
|
||||||
|
print("检测到新视频,准备点击...")
|
||||||
try:
|
try:
|
||||||
# 尝试点击视频
|
# 获取所有带 aria-posinset 的 div(代表每个视频帖子)
|
||||||
comment_buttons = page.query_selector_all('//div[@aria-posinset]')
|
comment_buttons = page.query_selector_all('//div[@aria-posinset]')
|
||||||
if comment_buttons:
|
if comment_buttons:
|
||||||
# 使用js去点击第一个评论按钮
|
# 使用第一个视频元素(最新发布的通常在第一个)
|
||||||
element = page.query_selector_all('//div[@aria-posinset]')[0]
|
element = comment_buttons[0]
|
||||||
# 获取元素位置
|
|
||||||
bounding_box = element.bounding_box()
|
bounding_box = element.bounding_box()
|
||||||
|
|
||||||
if bounding_box:
|
if bounding_box:
|
||||||
# 滚动到特定位置
|
# 滚动到元素可视区域中央
|
||||||
page.evaluate(
|
page.evaluate(
|
||||||
f''' () => {{ window.scrollTo({{ top: {bounding_box['y']} - window.innerHeight / 2, left: {bounding_box['x']} - window.innerWidth / 2, behavior: 'smooth' }}); }} ''')
|
f''' () => {{
|
||||||
|
window.scrollTo({{
|
||||||
|
top: {bounding_box['y']} - window.innerHeight / 2,
|
||||||
|
left: {bounding_box['x']} - window.innerWidth / 2,
|
||||||
|
behavior: 'smooth'
|
||||||
|
}});
|
||||||
|
}} '''
|
||||||
|
)
|
||||||
|
page.wait_for_timeout(1000) # 等待滚动完成
|
||||||
|
|
||||||
# 等待滚动完成
|
# 点击视频
|
||||||
page.wait_for_timeout(1000)
|
|
||||||
|
|
||||||
# 点击
|
|
||||||
element.click()
|
element.click()
|
||||||
|
# 等待视频页面加载
|
||||||
time.sleep(random.randint(3, 5))
|
time.sleep(random.randint(3, 5))
|
||||||
|
# 刷新页面确保视频播放器完全加载(可选)
|
||||||
page.reload(timeout=180000)
|
page.reload(timeout=180000)
|
||||||
time.sleep(random.randint(3, 5))
|
time.sleep(random.randint(3, 5))
|
||||||
return page.url
|
return page.url
|
||||||
else:
|
else:
|
||||||
raise OperationFailed(f"未找到视频")
|
raise OperationFailed("未找到视频元素的有效位置信息")
|
||||||
|
else:
|
||||||
|
raise OperationFailed("未找到任何视频元素(div[@aria-posinset])")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise OperationFailed(f"点击视频时出错: {e}")
|
raise OperationFailed(f"点击视频时出错: {e}")
|
||||||
|
|
||||||
# 计算下一次重试时间
|
# 4. 没有新视频,等待下一次重试
|
||||||
elapsed = time.time() - start_time
|
elapsed = time.time() - start_time
|
||||||
remaining_time = max_duration - elapsed
|
remaining_time = max_duration - elapsed
|
||||||
|
|
||||||
if remaining_time > 0:
|
if remaining_time > 0:
|
||||||
# 等待30秒或剩余时间(取较小值)
|
|
||||||
sleep_time = min(retry_interval, remaining_time)
|
sleep_time = min(retry_interval, remaining_time)
|
||||||
print(f"第{attempt}次尝试,等待 {sleep_time:.1f} 秒后重试... (剩余时间: {remaining_time:.1f}秒)")
|
print(f"第{attempt}次尝试未发现新视频,等待 {sleep_time:.1f} 秒后重试... (剩余时间: {remaining_time:.1f}秒)")
|
||||||
time.sleep(sleep_time)
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
attempt += 1
|
attempt += 1
|
||||||
|
|
||||||
|
# 超时退出
|
||||||
print("5分钟超时,退出重试")
|
print("5分钟超时,退出重试")
|
||||||
raise TimeoutError("未获取到新视频(可能视频上传失败),已超时")
|
raise TimeoutError("未获取到新视频(可能视频上传失败),已超时")
|
||||||
|
|
||||||
|
|
||||||
def playwright_post(cookies, content, image_key=None):
|
def _is_video_media(image_key):
|
||||||
|
return bool(image_key and image_key.lower().endswith('.mp4'))
|
||||||
|
|
||||||
|
|
||||||
|
def _download_post_media(image_key):
|
||||||
|
if not image_key:
|
||||||
|
return None
|
||||||
|
|
||||||
|
filename = image_key.split('/')[-1]
|
||||||
|
unique_filename = f"{uuid.uuid4()}_{filename}"
|
||||||
|
file_path = os.path.join(BASE_PATH, 'files', unique_filename)
|
||||||
|
client.fget_object(BUCKET, image_key, file_path)
|
||||||
|
return file_path
|
||||||
|
|
||||||
|
|
||||||
|
def _open_post_composer(page, has_media):
|
||||||
|
if has_media:
|
||||||
|
composer = page.locator('//div[contains(@aria-placeholder, "What\'s on your mind")]').first
|
||||||
|
composer.wait_for(state='visible', timeout=300000)
|
||||||
|
return composer
|
||||||
|
|
||||||
|
triggers = [
|
||||||
|
'//span[contains(text(), "What\'s on your mind")]',
|
||||||
|
'//div[@role="button"]//span[contains(text(), "What\'s on your mind")]',
|
||||||
|
]
|
||||||
|
last_error = None
|
||||||
|
for selector in triggers:
|
||||||
|
try:
|
||||||
|
page.locator(selector).first.click(timeout=30000)
|
||||||
|
composer = page.locator('//div[contains(@aria-placeholder, "What\'s on your mind")]').first
|
||||||
|
composer.wait_for(state='visible', timeout=300000)
|
||||||
|
return composer
|
||||||
|
except Error as e:
|
||||||
|
last_error = e
|
||||||
|
|
||||||
|
raise OperationFailed(f'未能打开发布输入框: {last_error}')
|
||||||
|
|
||||||
|
|
||||||
|
def _fill_post_content(page, content, has_media):
|
||||||
|
composer = _open_post_composer(page, has_media=has_media)
|
||||||
|
composer.fill(content, timeout=300000)
|
||||||
|
|
||||||
|
|
||||||
|
def _wait_post_submit_result(page):
|
||||||
|
page.wait_for_timeout(15000)
|
||||||
|
|
||||||
|
|
||||||
|
def _get_latest_post_url(page):
|
||||||
|
candidates = [
|
||||||
|
'//div[@aria-posinset="1"]//a[@role="link"]',
|
||||||
|
'//a[contains(@href, "/posts/")]',
|
||||||
|
'//a[contains(@href, "permalink")]',
|
||||||
|
]
|
||||||
|
for selector in candidates:
|
||||||
|
locator = page.locator(selector)
|
||||||
|
count = locator.count()
|
||||||
|
if count == 0:
|
||||||
|
continue
|
||||||
|
for index in range(count):
|
||||||
|
href = locator.nth(index).get_attribute('href')
|
||||||
|
if href and ('/posts/' in href or 'permalink' in href):
|
||||||
|
if href.startswith('/'):
|
||||||
|
return f'https://www.facebook.com{href}'
|
||||||
|
return href
|
||||||
|
|
||||||
|
page.reload(timeout=180000)
|
||||||
|
return page.url
|
||||||
|
|
||||||
|
|
||||||
|
def _is_facebook_home(page):
|
||||||
|
current_url = (page.url or '').rstrip('/')
|
||||||
|
return current_url in {'https://www.facebook.com', 'https://facebook.com'}
|
||||||
|
|
||||||
|
|
||||||
|
def playwright_post(cookies, content, image_key=None, dry_run=False):
|
||||||
path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
|
path = os.path.join(BASE_PATH, 'chrome', '130-0008', 'chrome.exe')
|
||||||
with lock:
|
with lock:
|
||||||
with sync_playwright() as playwright:
|
with sync_playwright() as playwright:
|
||||||
update_windows_distinguish()
|
update_windows_distinguish()
|
||||||
max_browser_retries = 3
|
max_browser_retries = 3
|
||||||
last_error = None
|
last_error = None
|
||||||
|
parsed_cookies = parse_cookies(cookies)
|
||||||
|
is_video = _is_video_media(image_key)
|
||||||
|
|
||||||
for browser_attempt in range(max_browser_retries):
|
for browser_attempt in range(max_browser_retries):
|
||||||
browser = None
|
browser = None
|
||||||
@@ -509,51 +609,52 @@ def playwright_post(cookies, content, image_key=None):
|
|||||||
headless=False, args=['--start-maximized'], executable_path=path
|
headless=False, args=['--start-maximized'], executable_path=path
|
||||||
)
|
)
|
||||||
context = browser.new_context(no_viewport=True)
|
context = browser.new_context(no_viewport=True)
|
||||||
context.add_cookies(parse_cookies(cookies))
|
context.add_cookies(parsed_cookies)
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
|
page.set_default_timeout(30000)
|
||||||
|
page.set_default_navigation_timeout(180000)
|
||||||
|
|
||||||
check_account_status(page, parse_cookies(cookies))
|
check_account_status(page, parsed_cookies)
|
||||||
# 声明默认发布视频数量
|
|
||||||
video_count = 0
|
video_count = 0
|
||||||
|
|
||||||
url = 'https://www.facebook.com'
|
url = 'https://www.facebook.com'
|
||||||
# 先获取视频数量
|
if is_video:
|
||||||
if image_key is not None and ".mp4" in image_key:
|
|
||||||
video_count = get_post_count(page, cookies)
|
video_count = get_post_count(page, cookies)
|
||||||
|
|
||||||
# check_account_status 已经把页面带到 Facebook 首页,避免重复二次跳转导致页面进程崩溃
|
if not _is_facebook_home(page):
|
||||||
if "facebook.com" not in page.url:
|
|
||||||
retry_goto(page, url)
|
retry_goto(page, url)
|
||||||
time.sleep(random.randint(3, 10))
|
sleep(3, 5)
|
||||||
time.sleep(5)
|
|
||||||
|
|
||||||
if image_key:
|
if image_key:
|
||||||
filename = image_key.split('/')[-1]
|
file_path = _download_post_media(image_key)
|
||||||
file_path = os.path.join(BASE_PATH, 'files', filename)
|
|
||||||
client.fget_object(BUCKET, image_key, file_path)
|
|
||||||
|
|
||||||
sleep(1, 2)
|
sleep(1, 2)
|
||||||
page.locator('input[accept="image/*,image/heif,image/heic,video/*,video/mp4,video/x-m4v,video/x-matroska,.mkv"]').set_input_files(file_path)
|
page.locator('input[accept="image/*,image/heif,image/heic,video/*,video/mp4,video/x-m4v,video/x-matroska,.mkv"]').set_input_files(file_path)
|
||||||
time.sleep(5)
|
page.locator('//div[contains(@aria-placeholder, "What\'s on your mind")]').first.wait_for(
|
||||||
|
state='visible', timeout=300000
|
||||||
|
)
|
||||||
|
|
||||||
if not image_key:
|
_fill_post_content(page, content, has_media=bool(image_key))
|
||||||
page.click('''//span[contains(text(), "What's on your mind")]''')
|
|
||||||
_edit_privacy(page)
|
_edit_privacy(page)
|
||||||
# 修改后 (使用 fill)
|
|
||||||
page.fill('//div[contains(@aria-placeholder, "What\'s on your mind")]', content,
|
|
||||||
timeout=300000)
|
|
||||||
page.click('//div[@aria-label="Post"]', timeout=300000)
|
|
||||||
time.sleep(15)
|
|
||||||
post_index = page.locator('//div[@aria-posinset="1"]//a[@role="link"]').nth(2)
|
|
||||||
post_index.click(timeout=600000)
|
|
||||||
time.sleep(5)
|
|
||||||
page.reload(timeout=180000)
|
|
||||||
post_url = page.url
|
|
||||||
# 视频格式要单独去获取链接
|
|
||||||
if image_key is not None and ".mp4" in image_key:
|
|
||||||
post_url = retry_get_new_video(page, cookies, video_count)
|
|
||||||
time.sleep(random.randint(3, 10))
|
|
||||||
|
|
||||||
|
post_button = page.locator('//div[@aria-label="Post"]').first
|
||||||
|
post_button.wait_for(state='visible', timeout=300000)
|
||||||
|
if dry_run:
|
||||||
|
screenshot_content = _full_screenshot()
|
||||||
|
key = f'screenshot/{uuid.uuid4()}.png'
|
||||||
|
put_object(key, screenshot_content)
|
||||||
|
return {
|
||||||
|
'response_url': page.url,
|
||||||
|
'screenshot_key': key,
|
||||||
|
'dry_run': True,
|
||||||
|
'message': '已完成到发布前校验,未实际点击 Post'
|
||||||
|
}
|
||||||
|
post_button.click(timeout=300000)
|
||||||
|
_wait_post_submit_result(page)
|
||||||
|
|
||||||
|
if is_video:
|
||||||
|
post_url = retry_get_new_video(page, cookies, video_count)
|
||||||
|
else:
|
||||||
|
post_url = _get_latest_post_url(page)
|
||||||
screenshot_content = _full_screenshot()
|
screenshot_content = _full_screenshot()
|
||||||
key = f'screenshot/{uuid.uuid4()}.png'
|
key = f'screenshot/{uuid.uuid4()}.png'
|
||||||
put_object(key, screenshot_content)
|
put_object(key, screenshot_content)
|
||||||
|
|||||||
76
test_playwright_post.py
Normal file
76
test_playwright_post.py
Normal file
@@ -0,0 +1,76 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
import uuid
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
import spider.task as task_module
|
||||||
|
|
||||||
|
|
||||||
|
# 直接在这里填写测试参数
|
||||||
|
COOKIES = {"c_user":"61586392053773","datr":"WV6nae8OJRICxw_kijnEpLD1","fr":"0hGCJPLgNUFLUrV6Z.AWcsvWymOjMTEFtBgLfY-pw-Xz-P97RXobgVcM284eq3bj35ub4.Bpp16Z..AAA.0.0.Bpp16Z.AWdJOJ9zB5VzOioMAXgv9kW9VqE","xs":"29:Tu729Jl28NcUNQ:2:1772576414:-1:-1"}
|
||||||
|
|
||||||
|
CONTENT = "International rankings consistently place the Philippines high on corruption perception. This damages foreign investment and our global standing. Let's change this narrative.#PoliticalDynastyCorruption"
|
||||||
|
|
||||||
|
LOCAL_VIDEO_PATH = r"E:\Code\Python\facebook\files\e2b8eaad-f950-46b6-9268-634d697f1ac9.mp4"
|
||||||
|
|
||||||
|
DRY_RUN = False
|
||||||
|
|
||||||
|
|
||||||
|
def _validate_config():
|
||||||
|
missing = [key for key, value in COOKIES.items() if not str(value).strip()]
|
||||||
|
if missing:
|
||||||
|
raise ValueError(f"cookies 缺少字段: {', '.join(missing)}")
|
||||||
|
|
||||||
|
if not CONTENT.strip():
|
||||||
|
raise ValueError("CONTENT 不能为空")
|
||||||
|
|
||||||
|
if not LOCAL_VIDEO_PATH.strip():
|
||||||
|
raise ValueError("LOCAL_VIDEO_PATH 不能为空")
|
||||||
|
|
||||||
|
video_path = Path(LOCAL_VIDEO_PATH)
|
||||||
|
if not video_path.exists():
|
||||||
|
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||||
|
|
||||||
|
if video_path.suffix.lower() != ".mp4":
|
||||||
|
raise ValueError(f"当前测试文件仅按 mp4 视频发布流程处理: {video_path}")
|
||||||
|
|
||||||
|
|
||||||
|
def _prepare_local_video(video_path_str):
|
||||||
|
source = Path(video_path_str)
|
||||||
|
temp_name = f"{uuid.uuid4()}_{source.name}"
|
||||||
|
target = Path(task_module.BASE_PATH) / "files" / temp_name
|
||||||
|
target.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
shutil.copy2(source, target)
|
||||||
|
return str(target)
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
_validate_config()
|
||||||
|
|
||||||
|
logger.add("./log/test_playwright_post.log", rotation="20 MB")
|
||||||
|
|
||||||
|
original_download = task_module._download_post_media
|
||||||
|
|
||||||
|
def _download_post_media_for_test(_image_key):
|
||||||
|
return _prepare_local_video(LOCAL_VIDEO_PATH)
|
||||||
|
|
||||||
|
task_module._download_post_media = _download_post_media_for_test
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = task_module.playwright_post(
|
||||||
|
cookies=COOKIES,
|
||||||
|
content=CONTENT,
|
||||||
|
image_key=os.path.basename(LOCAL_VIDEO_PATH),
|
||||||
|
dry_run=DRY_RUN,
|
||||||
|
)
|
||||||
|
logger.info("发布结果: {}", result)
|
||||||
|
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||||
|
finally:
|
||||||
|
task_module._download_post_media = original_download
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Reference in New Issue
Block a user