import asyncio
from pathlib import Path
from patchright.async_api import async_playwright
async def handle_pdf_route(route):
"""拦截 PDF 请求并强制下载"""
if route.request.url.endswith('.pdf'):
response = await route.fetch()
headers = dict(response.headers)
response = await route.fetch()
binary_data = await response.body() # 直接获取 bytes
print(f"获取到 PDF 文档,大小: {len(binary_data)} bytes")
print(f"PDF 文档内容: {binary_data[:100]}...") # 打印前100个字节
# with open("direct_download.pdf", "wb") as f:
# f.write(binary_data)
headers['Content-Disposition'] = 'attachment; filename="document.pdf"'
await route.fulfill(response=response, headers=headers)
await asyncio.sleep(10) # 等待页面加载完成
else:
await route.continue_()
async def main():
url = "https://www.fatf-gafi.org/content/dam/fatf-gafi/guidance/Second-12-Month-Review-Revised-FATF-Standards-Virtual-Assets-VASPS.pdf"
async with async_playwright() as p:
browser = await p.chromium.launch(headless=False) # 可见模式便于调试
context = await browser.new_context(accept_downloads=True)
page = await context.new_page()
await page.goto(url)
# 设置路由拦截
await page.route("**/*", handle_pdf_route)
await browser.close()
if __name__ == "__main__":
asyncio.run(main())
For further actions, you may consider blocking this person and/or reporting abuse
Top comments (0)