更改为loguru, 增加随机ua, 增加隐式等待
This commit is contained in:
23
main.py
23
main.py
@@ -1,6 +1,7 @@
|
|||||||
from concurrent.futures.thread import ThreadPoolExecutor
|
|
||||||
from spider.task import *
|
from spider.task import *
|
||||||
from logger import error_logger, record_full_log
|
from loguru import logger
|
||||||
|
|
||||||
|
logger.add("./log/logging.log", rotation="50 MB")
|
||||||
|
|
||||||
TASK_TYPE = {
|
TASK_TYPE = {
|
||||||
'get_account_profile': playwright_get_user_profile,
|
'get_account_profile': playwright_get_user_profile,
|
||||||
@@ -23,10 +24,10 @@ def get_task():
|
|||||||
'Content-Type': 'application/json'
|
'Content-Type': 'application/json'
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"include_task_type": ["login_account"],
|
"include_task_type": ["login_account", "get_account_profile"],
|
||||||
"exclude_task_type": []
|
"exclude_task_type": []
|
||||||
}
|
}
|
||||||
response = requests.post(url, headers=header, json=data)
|
response = requests.post(url, headers=header, json=data, proxies=None)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
result = response.json()
|
result = response.json()
|
||||||
return result
|
return result
|
||||||
@@ -39,13 +40,15 @@ def task_callback(tid, data, status='success', msg='success'):
|
|||||||
'data': data,
|
'data': data,
|
||||||
'message': msg,
|
'message': msg,
|
||||||
}
|
}
|
||||||
|
logger.info(f"回调任务: tid:{tid}, status:{status}, data:{data}, msg:{msg}")
|
||||||
response = requests.post(
|
response = requests.post(
|
||||||
f'{HOST}/queue/handle-data',
|
f'{HOST}/queue/handle-data',
|
||||||
json=body
|
json=body,
|
||||||
|
proxies=None
|
||||||
)
|
)
|
||||||
result = response.json()
|
result = response.json()
|
||||||
if response.status_code != 200:
|
if response.status_code != 200:
|
||||||
raise RuntimeError(f"任务回调失败:{result['msg']}")
|
raise RuntimeError(f"任务回调失败:code={response.status_code} text={result.text}")
|
||||||
|
|
||||||
|
|
||||||
def execute_task(tid, task_type, **kwargs):
|
def execute_task(tid, task_type, **kwargs):
|
||||||
@@ -53,10 +56,10 @@ def execute_task(tid, task_type, **kwargs):
|
|||||||
result = TASK_TYPE.get(task_type)(**kwargs)
|
result = TASK_TYPE.get(task_type)(**kwargs)
|
||||||
task_callback(tid, data=result)
|
task_callback(tid, data=result)
|
||||||
except (AuthException, OperationFailed) as e:
|
except (AuthException, OperationFailed) as e:
|
||||||
record_full_log(error_logger, e)
|
logger.exception("账号或操作异常")
|
||||||
task_callback(tid, data={}, status=e.error_type, msg=str(e))
|
task_callback(tid, data={}, status=e.error_type, msg=str(e))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
record_full_log(error_logger, e)
|
logger.exception("未捕获异常")
|
||||||
task_callback(tid, data={}, status='failed', msg=str(e))
|
task_callback(tid, data={}, status='failed', msg=str(e))
|
||||||
|
|
||||||
|
|
||||||
@@ -65,13 +68,15 @@ def main():
|
|||||||
try:
|
try:
|
||||||
task = get_task()
|
task = get_task()
|
||||||
if task is None:
|
if task is None:
|
||||||
|
logger.info("无任务")
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
continue
|
continue
|
||||||
|
logger.info(f"收到任务{task}")
|
||||||
task['data']['tid'] = task['id']
|
task['data']['tid'] = task['id']
|
||||||
task['data']['task_type'] = task['task_type']
|
task['data']['task_type'] = task['task_type']
|
||||||
execute_task(**task['data'])
|
execute_task(**task['data'])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
error_logger.error(f'Main Error: {e}')
|
logger.error(f'Main Error: {e}')
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
159
spider/task.py
159
spider/task.py
@@ -10,14 +10,16 @@ import uuid
|
|||||||
import pyotp
|
import pyotp
|
||||||
import pywintypes
|
import pywintypes
|
||||||
import requests
|
import requests
|
||||||
|
from fake_useragent import UserAgent # 导入 UserAgent
|
||||||
import win32api
|
import win32api
|
||||||
import win32con
|
import win32con
|
||||||
from PIL import ImageGrab
|
from PIL import ImageGrab
|
||||||
|
from loguru import logger
|
||||||
|
from playwright._impl._page import Page
|
||||||
from playwright.sync_api import sync_playwright, Error, TimeoutError
|
from playwright.sync_api import sync_playwright, Error, TimeoutError
|
||||||
|
|
||||||
from const import BUCKET, BASE_PATH
|
from const import BUCKET, BASE_PATH
|
||||||
from exceptions import AuthException, OperationFailed
|
from exceptions import AuthException, OperationFailed
|
||||||
from logger import error_logger
|
|
||||||
from miniofile import client, put_object
|
from miniofile import client, put_object
|
||||||
|
|
||||||
|
|
||||||
@@ -737,18 +739,30 @@ def update_windows_distinguish(x=1920, y=1080):
|
|||||||
|
|
||||||
|
|
||||||
def _change_language(page):
|
def _change_language(page):
|
||||||
|
# 判断是否为英文
|
||||||
|
lang = page.locator('html').get_attribute('lang')
|
||||||
|
if lang == "en":
|
||||||
|
return
|
||||||
|
|
||||||
sleep(1, 2)
|
sleep(1, 2)
|
||||||
page.locator('//*[@style="height:40px;width:40px"]').first.click()
|
page.locator('//*[@style="height:40px;width:40px"]').first.click()
|
||||||
sleep(1, 2)
|
sleep(1, 2)
|
||||||
page.click('//div[@role="listitem" and @class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"][1]')
|
|
||||||
|
# 点击设置图标
|
||||||
|
page.wait_for_selector(
|
||||||
|
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -419px")]',
|
||||||
|
timeout=10000).click()
|
||||||
|
|
||||||
|
# 点击语言
|
||||||
|
page.wait_for_selector('//div[@role="menu"]/div[2]', timeout=10000).click()
|
||||||
|
|
||||||
|
page.wait_for_selector(
|
||||||
|
'//i[@data-visualcompletion="css-img" and contains(@style, "background-position: 0px -793px")]',
|
||||||
|
timeout=10000).click()
|
||||||
|
|
||||||
sleep(1, 2)
|
sleep(1, 2)
|
||||||
page.click('//div[@role="menu"]/div[2]')
|
page.click('//span[text()="English (US)"][1]')
|
||||||
sleep(1, 2)
|
|
||||||
page.click('//div[@class="x1y1aw1k x4uap5 xwxc41k xkhd6sd"]/div/div[2]')
|
|
||||||
sleep(1, 2)
|
|
||||||
page.click('//span[@class="x1lliihq x6ikm8r x10wlt62 x1n2onr6 xlyipyv xuxw1ft" and text()="English (US)"][1]')
|
|
||||||
sleep(3, 5)
|
sleep(3, 5)
|
||||||
page.wait_for_load_state()
|
|
||||||
|
|
||||||
|
|
||||||
def _edit_privacy(page):
|
def _edit_privacy(page):
|
||||||
@@ -785,29 +799,10 @@ def parse_cookies(cookies):
|
|||||||
def check_freeze_account(uid):
|
def check_freeze_account(uid):
|
||||||
# 检查是否冻结
|
# 检查是否冻结
|
||||||
headers = {
|
headers = {
|
||||||
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
|
|
||||||
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
||||||
'cache-control': 'max-age=0',
|
|
||||||
'dpr': '2',
|
|
||||||
'priority': 'u=0, i',
|
|
||||||
'referer': 'https://www.facebook.com/',
|
|
||||||
'sec-ch-prefers-color-scheme': 'light',
|
|
||||||
'sec-ch-ua': '"Chromium";v="134", "Not:A-Brand";v="24", "Google Chrome";v="134"',
|
|
||||||
'sec-ch-ua-full-version-list': '"Chromium";v="134.0.6998.89", "Not:A-Brand";v="24.0.0.0", "Google Chrome";v="134.0.6998.89"',
|
|
||||||
'sec-ch-ua-mobile': '?0',
|
|
||||||
'sec-ch-ua-model': '""',
|
|
||||||
'sec-ch-ua-platform': '"macOS"',
|
|
||||||
'sec-ch-ua-platform-version': '"15.3.2"',
|
|
||||||
'sec-fetch-dest': 'document',
|
|
||||||
'sec-fetch-mode': 'navigate',
|
|
||||||
'sec-fetch-site': 'same-origin',
|
|
||||||
'sec-fetch-user': '?1',
|
|
||||||
'upgrade-insecure-requests': '1',
|
|
||||||
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
|
||||||
'viewport-width': '743',
|
|
||||||
}
|
}
|
||||||
url = f"https://graph.facebook.com/{uid}/picture?type=normal"
|
url = f"https://graph.facebook.com/{uid}/picture?type=normal"
|
||||||
response = requests.get(url, headers=headers, allow_redirects=False)
|
response = requests.get(url, headers=headers, allow_redirects=False, verify=False)
|
||||||
if response.status_code == 302:
|
if response.status_code == 302:
|
||||||
if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg':
|
if response.headers.get('Location') == 'https://static.xx.fbcdn.net/rsrc.php/v1/yh/r/C5yt7Cqf3zU.jpg':
|
||||||
raise AuthException('该账号已被冻结', 'frozen')
|
raise AuthException('该账号已被冻结', 'frozen')
|
||||||
@@ -1169,8 +1164,51 @@ def get_login_continue_btn(page):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def retry_goto(page: "Page", url: str, max_retries: int = 3, retry_delay: int = 5):
|
||||||
|
"""
|
||||||
|
Attempts to navigate to a URL with retries on timeout using synchronous Playwright.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
page: The synchronous Playwright Page object.
|
||||||
|
url: The URL to navigate to.
|
||||||
|
max_retries: Maximum number of retry attempts (including the initial attempt).
|
||||||
|
retry_delay: Delay in seconds between retries.
|
||||||
|
"""
|
||||||
|
# ... potentially other code before goto ...
|
||||||
|
|
||||||
|
for attempt in range(max_retries):
|
||||||
|
try:
|
||||||
|
if attempt > 0:
|
||||||
|
logger.info(f"Retrying navigation to {url}, attempt {attempt + 1}/{max_retries}...")
|
||||||
|
else:
|
||||||
|
logger.info(f"Navigating to {url}, initial attempt...")
|
||||||
|
|
||||||
|
# Make the synchronous goto call within the try block
|
||||||
|
# Use wait_until='load' as in your traceback, or adjust if needed
|
||||||
|
page.goto(url, timeout=30000) # Use the synchronous call
|
||||||
|
|
||||||
|
print(f"Successfully navigated to {url} on attempt {attempt + 1}")
|
||||||
|
break # Navigation was successful, exit the retry loop
|
||||||
|
|
||||||
|
except TimeoutError as e:
|
||||||
|
print(f"Navigation to {url} timed out on attempt {attempt + 1}.")
|
||||||
|
if attempt < max_retries - 1:
|
||||||
|
print(f"Waiting {retry_delay} seconds before retrying...")
|
||||||
|
time.sleep(retry_delay) # Use synchronous sleep
|
||||||
|
else:
|
||||||
|
print(f"All {max_retries} attempts failed for {url}.")
|
||||||
|
# If all retries fail, re-raise the exception
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
# Catch any other unexpected errors during goto
|
||||||
|
print(f"An unexpected error occurred during navigation to {url} on attempt {attempt + 1}: {e}")
|
||||||
|
# Decide if other exceptions should also trigger retries
|
||||||
|
# For now, we'll just re-raise other exceptions immediately
|
||||||
|
raise e
|
||||||
|
|
||||||
|
|
||||||
def playwright_login(username, password, code_2fa=None):
|
def playwright_login(username, password, code_2fa=None):
|
||||||
error_logger.info(f"登录账号{username}")
|
logger.info(f"登录账号{username}")
|
||||||
# 检查是否冻结
|
# 检查是否冻结
|
||||||
check_freeze_account(username)
|
check_freeze_account(username)
|
||||||
|
|
||||||
@@ -1181,20 +1219,9 @@ def playwright_login(username, password, code_2fa=None):
|
|||||||
browser = playwright.chromium.launch(
|
browser = playwright.chromium.launch(
|
||||||
headless=False, args=['--start-maximized'], executable_path=path
|
headless=False, args=['--start-maximized'], executable_path=path
|
||||||
)
|
)
|
||||||
context = browser.new_context(no_viewport=True,
|
random_user_agent = UserAgent().getBrowser("Chrome").get("useragent")
|
||||||
user_agent='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36')
|
logger.info(f"使用ua={random_user_agent}")
|
||||||
page = context.new_page()
|
context = browser.new_context(no_viewport=True, user_agent=random_user_agent)
|
||||||
url = 'https://www.facebook.com'
|
|
||||||
page.goto(url)
|
|
||||||
time.sleep(random.randint(1, 10))
|
|
||||||
page.locator('//input[@id="email"]').type(username, delay=30)
|
|
||||||
time.sleep(random.randint(1, 3))
|
|
||||||
page.locator('//input[@id="pass"]').type(password, delay=30)
|
|
||||||
time.sleep(random.randint(1, 3))
|
|
||||||
page.click('//button[@name="login"]')
|
|
||||||
|
|
||||||
page.wait_for_load_state()
|
|
||||||
time.sleep(random.randint(3, 5))
|
|
||||||
# 设置语言为英文
|
# 设置语言为英文
|
||||||
context.add_cookies([
|
context.add_cookies([
|
||||||
{
|
{
|
||||||
@@ -1207,13 +1234,29 @@ def playwright_login(username, password, code_2fa=None):
|
|||||||
"secure": False,
|
"secure": False,
|
||||||
},
|
},
|
||||||
])
|
])
|
||||||
|
page = context.new_page()
|
||||||
|
url = 'https://www.facebook.com'
|
||||||
|
retry_goto(page, url)
|
||||||
|
page.locator('//input[@id="email"]').type(username, delay=30)
|
||||||
time.sleep(random.randint(1, 3))
|
time.sleep(random.randint(1, 3))
|
||||||
page.reload()
|
page.locator('//input[@id="pass"]').type(password, delay=30)
|
||||||
time.sleep(random.randint(1, 3))
|
time.sleep(random.randint(1, 3))
|
||||||
|
page.click('//button[@name="login"]')
|
||||||
|
page.wait_for_load_state()
|
||||||
|
time.sleep(random.randint(3, 5))
|
||||||
|
arkose_captcha = page.query_selector('#arkose-captcha')
|
||||||
|
if arkose_captcha:
|
||||||
|
logger.info(f"账号{username} 弹语音识别验证")
|
||||||
|
raise OperationFailed("操作失败")
|
||||||
|
|
||||||
|
arkose_captcha = page.query_selector('#captcha-recaptcha')
|
||||||
|
if arkose_captcha:
|
||||||
|
logger.info(f"账号{username} 弹谷歌验证")
|
||||||
|
raise OperationFailed("操作失败")
|
||||||
|
|
||||||
captcha_img = page.query_selector('//img[contains(@src, "captcha")]')
|
captcha_img = page.query_selector('//img[contains(@src, "captcha")]')
|
||||||
if captcha_img:
|
if captcha_img:
|
||||||
error_logger.info(f"账号{username} 需要验证")
|
logger.info(f"账号{username} 需要验证")
|
||||||
data = {
|
data = {
|
||||||
'user': 'ycxxkj',
|
'user': 'ycxxkj',
|
||||||
'pass2': 'B4DBF06831577C6558F823879061626C',
|
'pass2': 'B4DBF06831577C6558F823879061626C',
|
||||||
@@ -1229,10 +1272,12 @@ def playwright_login(username, password, code_2fa=None):
|
|||||||
page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click()
|
page.locator('//img[contains(@src, "captcha")]/parent::div/parent::div/div').nth(4).click()
|
||||||
else:
|
else:
|
||||||
raise OperationFailed('验证码解析错误')
|
raise OperationFailed('验证码解析错误')
|
||||||
time.sleep(3)
|
# 检查是否还有验证码, 隐式等待60秒
|
||||||
page.wait_for_load_state()
|
page.wait_for_selector(
|
||||||
# 检查是否还有验证码
|
'//span[@class="x1lliihq x1plvlek xryxfnj x1n2onr6 x1ji0vk5 x18bv5gf x193iq5w xeuugli x1fj9vlw x13faqbe x1vvkbs x1s928wv xhkezso x1gmr53x x1cpjm7i x1fgarty x1943h6x x1qo61fq x81x36d xa4e6wy x1rhavg7 xzsf02u x1yc453h xudqn12 x3x7a5m x1yztbdb"]',
|
||||||
h2 = page.query_selector("//h2/span")
|
timeout=60000)
|
||||||
|
h2 = page.wait_for_selector(
|
||||||
|
'//div[@class="x1n2onr6 x1ja2u2z x9f619 x78zum5 xdt5ytf x2lah0s x193iq5w"]//h2/span', timeout=60000)
|
||||||
if h2 is None:
|
if h2 is None:
|
||||||
raise OperationFailed('页面有误')
|
raise OperationFailed('页面有误')
|
||||||
else:
|
else:
|
||||||
@@ -1242,7 +1287,7 @@ def playwright_login(username, password, code_2fa=None):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if not h2.text_content() in text_contexts:
|
if not h2.text_content() in text_contexts:
|
||||||
error_logger.info(f"账号{username} 操作失败")
|
logger.info(f"账号{username} 操作失败")
|
||||||
raise OperationFailed("操作失败")
|
raise OperationFailed("操作失败")
|
||||||
auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]')
|
auth_span = page.query_selector('//span[text()="Try Another Way" or text()="Try another way"]')
|
||||||
if auth_span:
|
if auth_span:
|
||||||
@@ -1261,19 +1306,15 @@ def playwright_login(username, password, code_2fa=None):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code)
|
page.locator('//label[text()="Code"]/preceding-sibling::input').fill(auth_code)
|
||||||
page.click('//span[text()="Continue"]')
|
page.click('//span[text()="Continue"]')
|
||||||
# 这里验证可能会很慢, 硬等
|
# 等待登录成功页面出来
|
||||||
time.sleep(40)
|
page.wait_for_selector(
|
||||||
save_profile = page.query_selector('//span[text()="Save"]')
|
"xpath=//h2[normalize-space()='You’re logged in. Trust this device?'] or //span[text()='Save']",
|
||||||
if save_profile:
|
timeout=60000)
|
||||||
save_profile.click()
|
|
||||||
trust_device_select = page.query_selector('''//span[text()="Always confirm that it's me"]''')
|
|
||||||
if trust_device_select:
|
|
||||||
trust_device_select.click()
|
|
||||||
|
|
||||||
time.sleep(3)
|
|
||||||
c = {i['name']: i['value'] for i in context.cookies()}
|
c = {i['name']: i['value'] for i in context.cookies()}
|
||||||
if c["c_user"] is None:
|
if c["c_user"] is None:
|
||||||
raise OperationFailed("操作失败")
|
raise OperationFailed("操作失败")
|
||||||
|
logger.info(f"登录账号{username} 登录成功")
|
||||||
|
|
||||||
context.close()
|
context.close()
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|||||||
Reference in New Issue
Block a user