diff --git a/main.py b/main.py index 13ff7c8..f1b4f88 100644 --- a/main.py +++ b/main.py @@ -74,41 +74,17 @@ def execute_task(tid, task_type, **kwargs): def main(): - # 创建任务队列,最大容量为3 - task_queue = Queue(maxsize=2) - # 存储正在运行的任务 - running_tasks = set() - - # 创建线程池执行任务(2个工作线程) - with ThreadPoolExecutor(max_workers=2) as executor: - while True: - try: - # 清理已完成的任务 - running_tasks = {task for task in running_tasks if not task.done()} - - # 如果队列未满,尝试获取新任务 - if len(running_tasks) < 2: - task = get_task() - if task: - logger.info(f"收到任务: {task['id']} - {task['task_type']}") - task_data = task['data'] - task_data['tid'] = task['id'] - task_data['task_type'] = task['task_type'] - # 提交任务到线程池并保存future对象 - future = executor.submit(execute_task, **task_data) - running_tasks.add(future) - else: - logger.info("无更多任务") - time.sleep(10) - else: - # 达到最大并发数时等待 - logger.info("等待任务完成") - time.sleep(1) - - except Exception as e: - logger.error(f'Main Error: {e}') + while True: + try: + task = get_task() + if task is None: time.sleep(10) - + continue + task['data']['tid'] = task['id'] + task['data']['task_type'] = task['task_type'] + execute_task(**task['data']) + except Exception as e: + time.sleep(10) if __name__ == '__main__': main() diff --git a/spider/task.py b/spider/task.py index 8fb0714..c2d6803 100644 --- a/spider/task.py +++ b/spider/task.py @@ -25,7 +25,6 @@ from exceptions import AuthException, OperationFailed from miniofile import client, put_object - def sleep(a, b=None): if not b: return time.sleep(a) @@ -276,18 +275,19 @@ def retry_get_new_video(page, cookies, post_count): # 如果新帖子数量大于初始帖子数量,则表示有新帖子上传 if new_post_count > post_count: try: - # 尝试点击评论按钮 + # 尝试点击视频 comment_buttons = page.query_selector_all( '//a[@aria-label="Enlarge"]') if comment_buttons: - comment_buttons[0].hover() - comment_buttons[0].click() + # 使用js去点击第一个评论按钮 + element = page.query_selector_all('//a[@aria-label="Enlarge"]')[0] + page.evaluate('(element) => element.click()', element) time.sleep(random.randint(3, 5)) page.reload() time.sleep(random.randint(3, 5)) return page.url except Exception as e: - raise OperationFailed(f"点击评论按钮时出错: {e}") + raise OperationFailed(f"点击视频时出错: {e}") # 计算下一次重试时间 elapsed = time.time() - start_time