当前位置: 首页 > news >正文

深入解析:Playwright同步、异步、并行、串行执行效率比较

一、源码展示

1.1串行同步.py

import time
from playwright.sync_api import sync_playwrightURL = "https://www.baidu.com"def do_one(page, keyword, is_first_run):t0 = time.perf_counter()# 优化1:首次运行才加载页面,后续复用页面if is_first_run:# 优化等待策略:DOM加载完成即可,无需等待全部资源page.goto(URL, wait_until='domcontentloaded')else:# 非首次运行:直接返回首页并清空搜索框(比重新goto更快)page.get_by_role("link", name="到百度首页").click()page.wait_for_selector('#chat-textarea', timeout=10000)# 输入搜索关键词page.fill('#chat-textarea', keyword)# 优化2:合并点击与导航等待,减少一次等待with page.expect_navigation(wait_until='domcontentloaded'):page.click('#chat-submit-button')# 优化3:用精准元素等待替代networkidlepage.wait_for_selector('#content_left', timeout=10000)cost = time.perf_counter() - t0return costdef main():t_all = time.perf_counter()with sync_playwright() as p:# 浏览器启动优化:添加参数加速t_launch = time.perf_counter()browser = p.chromium.launch(headless=True,args=['--disable-gpu','--disable-extensions','--no-sandbox','--disable-dev-shm-usage'])launch_cost = time.perf_counter() - t_launchpage = browser.new_page()# 保持串行执行,通过参数标记是否首次运行jay_cost = do_one(page, "周杰伦", is_first_run=True)kun_cost = do_one(page, "蔡徐坤", is_first_run=False)browser.close()total_biz = jay_cost + kun_costwall_clock = time.perf_counter() - t_allreturn launch_cost, total_biz, wall_clockif __name__ == '__main__':a,b,c=main()print(a+b+c)

1.2串行异步.py

import time
import asyncio
from playwright.async_api import async_playwrightURL = "https://www.baidu.com"async def do_one(page, keyword):t0 = time.perf_counter()# 修复:添加await关键字,正确调用异步方法window_obj = await page.evaluate("() => window")if "已初始化" not in window_obj:# 首次加载:使用domcontentloaded加快首屏加载await page.goto(URL, wait_until='domcontentloaded')# 标记为已初始化await page.evaluate("() => window['已初始化'] = true")else:# 非首次:直接清空搜索框,无需重新加载整个页面await page.fill('#chat-textarea', '')# 输入搜索关键词await page.fill('#chat-textarea', keyword)# 点击与导航等待合并async with page.expect_navigation(wait_until='domcontentloaded'):await page.click('#chat-submit-button')# 等待搜索结果区域出现await page.wait_for_selector('#content_left', timeout=10000)cost = time.perf_counter() - t0return costasync def main():async with async_playwright() as p:# 浏览器启动优化t_launch_0 = time.perf_counter()browser = await p.chromium.launch(headless=True,args=['--disable-gpu','--disable-extensions','--no-sandbox','--disable-dev-shm-usage'])launch_cost = time.perf_counter() - t_launch_0page = await browser.new_page()# 保持串行执行jay_cost = await do_one(page, "周杰伦")kun_cost = await do_one(page, "蔡徐坤")await browser.close()total_biz = jay_cost + kun_costwall_clock = launch_cost + total_bizreturn launch_cost, total_biz, wall_clockif __name__ == '__main__':a, b, c = asyncio.run(main())print(a+b+c)

1.3并行同步.py

import time
from playwright.sync_api import sync_playwright
from concurrent.futures import ThreadPoolExecutor
import threadingURL = "https://www.baidu.com"# 使用线程本地存储来隔离不同线程的Playwright实例
thread_local = threading.local()def init_playwright():"""为每个线程初始化独立的Playwright实例"""if not hasattr(thread_local, 'playwright'):thread_local.playwright = sync_playwright().start()return thread_local.playwrightdef do_work(keyword):# 每个线程使用自己的Playwright实例,避免线程冲突p = init_playwright()# 浏览器冷启动计时t_launch = time.perf_counter()browser = p.chromium.launch(headless=True,args=['--disable-gpu','--disable-extensions','--no-sandbox','--disable-setuid-sandbox','--disable-dev-shm-usage'])launch_cost = time.perf_counter() - t_launchpage = browser.new_page()t0 = time.perf_counter()# 优化页面加载策略page.goto(URL, wait_until='domcontentloaded')# 元素交互search_box = page.locator('#chat-textarea')search_box.fill(keyword)# 点击与导航等待合并with page.expect_navigation(wait_until='domcontentloaded'):page.click('#chat-submit-button')# 等待搜索结果区域出现page.wait_for_selector('#content_left', timeout=10000)biz_cost = time.perf_counter() - t0browser.close()return launch_cost, biz_costdef main():t_all = time.perf_counter()results = []# 定义线程清理函数def cleanup_thread_local():if hasattr(thread_local, 'playwright'):thread_local.playwright.stop()try:with ThreadPoolExecutor(max_workers=2) as pool:results = list(pool.map(do_work, ["周杰伦", "蔡徐坤"]))(launch1, biz1), (launch2, biz2) = resultslaunch_cost = launch1 + launch2total_biz = max(biz1, biz2)wall_clock = time.perf_counter() - t_allreturn launch_cost, total_biz, wall_clockfinally:# 确保所有线程的Playwright实例都被正确关闭cleanup_thread_local()if __name__ == '__main__':a, b, c = main()print(a+b+c)

1.4并行异步.py

import time
import asyncio
from playwright.async_api import async_playwrightURL = "https://www.baidu.com"async def do_one(page, keyword):t0 = time.perf_counter()# 优化导航等待策略,使用'domcontentloaded'可能比'load'更快await page.goto(URL, wait_until='domcontentloaded')# 直接使用选择器操作,减少可能的查找开销search_box = page.locator('#chat-textarea')await search_box.fill(keyword)# 点击搜索按钮,同时等待导航完成async with page.expect_navigation(wait_until='domcontentloaded'):await page.click('#chat-submit-button')# 可以根据实际情况调整等待策略,不一定需要networkidle# 例如等待某个特定元素出现await page.wait_for_selector('#content_left')cost = time.perf_counter() - t0return costasync def main():async with async_playwright() as p:# 浏览器启动优化:可以添加一些启动参数加速t_launch_0 = time.perf_counter()browser = await p.chromium.launch(headless=True,args=['--disable-gpu','--disable-extensions','--disable-dev-shm-usage','--no-sandbox','--disable-setuid-sandbox'])launch_cost = time.perf_counter() - t_launch_0# 优化:可以复用一个上下文而不是创建两个# 除非有特殊的隔离需求context = await browser.new_context()page1, page2 = await context.new_page(), await context.new_page()# 并行执行两个搜索任务biz_costs = await asyncio.gather(do_one(page1, "周杰伦"),do_one(page2, "蔡徐坤"))await browser.close()jay_cost, kun_cost = biz_coststotal_biz = max(biz_costs)wall_clock = launch_cost + total_bizreturn launch_cost, total_biz, wall_clockif __name__ == '__main__':a,b,c=asyncio.run(main())print(a+b+c)

1.5init.py

from .串行同步 import main as one
from .串行异步 import main as two
from .并行同步 import main as three
from .并行异步 import main as four

1.6compare.py

import asyncio
import matplotlib.pyplot as plt
from 同步异步 import one, two, three, fourdef out_pic():# 中文与负号plt.rcParams['font.sans-serif'] = ['SimHei']plt.rcParams['axes.unicode_minus'] = Falselabels = ['串行同步', '串行异步', '并行同步', '并行异步']# 收集数据launch_costs, total_biz, wall_clocks = [], [], []for func in (one, lambda :asyncio.run(two()), three, lambda :asyncio.run(four())):lc, tb, wc = func()launch_costs.append(lc)total_biz.append(tb)wall_clocks.append(wc)# 一个画布上画 3 个子图fig, axes = plt.subplots(1, 3, figsize=(15, 4), sharey=False)colors = ['skyblue', 'orange', 'lightgreen']titles = ['浏览器冷启动时间', '业务耗时对比', '总耗时对比']data_list = [launch_costs, total_biz, wall_clocks]for ax, data, title, color in zip(axes, data_list, titles, colors):ax.bar(labels, data, color=color)ax.set_title(title)ax.set_ylabel('秒')# 关键:先设刻度位置,再设标签ax.set_xticks(range(len(labels)))  # 固定刻度位置ax.set_xticklabels(labels, rotation=15, ha='right')plt.tight_layout()plt.show()if __name__ == '__main__':out_pic()

二、目录结构

三、运行结果

四、结论

理论上说,并行异步运行最快,实际运行可能有些许误差

http://www.hskmm.com/?act=detail&tid=24047

相关文章:

  • 2025十一集训——Day2模拟赛
  • 2025十一集训——Day模拟赛
  • Qt纯代码实现智能安防集中管理平台/楼宇对讲管理系统/门禁管理/视频监控
  • 汉文博士词典库源文件已在 github 开放
  • 读人形机器人30未来20年
  • Flutter + Ollama:开启本地AI的全平台新纪元 —— 从零剖析一款现代化AI客户端的技能奥秘
  • 股票资料API接口全解析:从技术原理到多语言实战(含实时行情、MACD、KDJ等技术指标数据与API文档详解)
  • 产业园区招商团队快躺平了 - 智慧园区
  • 洛谷 P3545
  • 题解:AT_wtf22_day2_b The Greatest Two
  • 威胁狩猎实战:终端攻击行为分析与检测
  • 实用指南:基于Hadoop+Spark的人体体能数据分析与可视化系统开源实现
  • 英语_阅读_Water Sliding_待读
  • 实用指南:ArcGIS JSAPI 高级教程 - 高亮效果优化之开启使用多高亮样式
  • const在for用不了
  • about me
  • 10月北京中学集训随笔
  • 使用100%缩放比例重新启动Visual Studio 界面模糊的解决方案
  • 某工程师入职华为,职级比较高,但还看不懂代码,有点尴尬
  • 使用Silobase在几分钟内快速部署后端API
  • 【光照】[各向异性]在UnityURP中的实现
  • 基于HAL库和中断的LED流水灯
  • 从衡阳麻衣事件到AI元人文:用户端元人文实践的进化路径研究——声明ai研究
  • 5_flutter UI框架选型
  • 4_查询flutter版本信息
  • 3_flutter简单教程
  • 如何给 Claude 中的网页做截图
  • 2_gradle配置加速
  • AI元人文:岐金兰《悬鉴》起源
  • 九月回忆