playwright failing when running in docker container

  Kiến thức lập trình

I have a simple fastapi app where i use playwright in to scrape the internet.

it runs perfectly locally but when i run it in the container it fails

I am using the official playwright image

mcr.microsoft.com/playwright/python:v1.44.0-focal

with the following code

import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import httpx
import json
import os

async def call_internet_search(query):
    print("USING THE INTERNET!")
    print(query)
    search_url = "https://api.bing.microsoft.com/v7.0/search"
    headers = {"Ocp-Apim-Subscription-Key": os.getenv("REALEG_BING_KEY")}
    params = {"q": query, "count": 5, "textDecorations": True, "textFormat": "Raw"}

    async with httpx.AsyncClient() as client:
        response = await client.get(search_url, headers=headers, params=params)
        response.raise_for_status()
        search_results = response.json()
        top_results = []

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        context = await browser.new_context(
            bypass_csp=True,
            java_script_enabled=True,
            ignore_https_errors=True)

        # Run the scrape_website_playwright coroutines in parallel
        tasks = [
            scrape_website_playwright(context, result["url"], i + 1)
            for i, result in enumerate(search_results["webPages"]["value"])
        ]
        top_results = await asyncio.gather(*tasks)

        await browser.close()
    return json.dumps(top_results)

async def scrape_website_playwright(context, url, index):
    try:
        page = await context.new_page()
        await page.goto(url)
        content = await page.content()
        soup = BeautifulSoup(content, 'html.parser')
        text_content = soup.get_text(separator=' ', strip=True)
        await page.close()
        return {
            "index": index,
            "url": url,
            "snippet": text_content,
        }
    except Exception as e:
        return {
            "index": index,
            "url": url,
            "snippet": f"Failed to load content: {str(e)}",
        }

and here is my logs:

2024-07-11 19:39:34 INFO:     192.168.65.1:48202 - "POST /chats/1706716117109x368349438951905700?is_test=false HTTP/1.1" 500 Internal Server Error
2024-07-11 19:39:34 ERROR:    Exception in ASGI application
2024-07-11 19:39:34 Traceback (most recent call last):
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/uvicorn/protocols/http/httptools_impl.py", line 399, in run_asgi
2024-07-11 19:39:34     result = await app(  # type: ignore[func-returns-value]
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/uvicorn/middleware/proxy_headers.py", line 70, in __call__
2024-07-11 19:39:34     return await self.app(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/fastapi/applications.py", line 1054, in __call__
2024-07-11 19:39:34     await super().__call__(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/applications.py", line 123, in __call__
2024-07-11 19:39:34     await self.middleware_stack(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/middleware/errors.py", line 186, in __call__
2024-07-11 19:39:34     raise exc
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/middleware/errors.py", line 164, in __call__
2024-07-11 19:39:34     await self.app(scope, receive, _send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/middleware/exceptions.py", line 65, in __call__
2024-07-11 19:39:34     await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2024-07-11 19:39:34     raise exc
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2024-07-11 19:39:34     await app(scope, receive, sender)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/routing.py", line 756, in __call__
2024-07-11 19:39:34     await self.middleware_stack(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/routing.py", line 776, in app
2024-07-11 19:39:34     await route.handle(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/routing.py", line 297, in handle
2024-07-11 19:39:34     await self.app(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/routing.py", line 77, in app
2024-07-11 19:39:34     await wrap_app_handling_exceptions(app, request)(scope, receive, send)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/_exception_handler.py", line 64, in wrapped_app
2024-07-11 19:39:34     raise exc
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/_exception_handler.py", line 53, in wrapped_app
2024-07-11 19:39:34     await app(scope, receive, sender)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/starlette/routing.py", line 72, in app
2024-07-11 19:39:34     response = await func(request)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/fastapi/routing.py", line 278, in app
2024-07-11 19:39:34     raw_response = await run_endpoint_function(
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/fastapi/routing.py", line 191, in run_endpoint_function
2024-07-11 19:39:34     return await dependant.call(**values)
2024-07-11 19:39:34   File "/code/app/main.py", line 84, in read_root
2024-07-11 19:39:34     output=await call_internet_search(json.loads(tool.function.arguments)['query'])
2024-07-11 19:39:34   File "/code/app/utils.py", line 23, in call_internet_search
2024-07-11 19:39:34     context = await browser.new_context(
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/playwright/async_api/_generated.py", line 13460, in new_context
2024-07-11 19:39:34     await self._impl_obj.new_context(
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/playwright/_impl/_browser.py", line 127, in new_context
2024-07-11 19:39:34     channel = await self._channel.send("newContext", params)
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/playwright/_impl/_connection.py", line 59, in send
2024-07-11 19:39:34     return await self._connection.wrap_api_call(
2024-07-11 19:39:34   File "/usr/local/lib/python3.8/dist-packages/playwright/_impl/_connection.py", line 514, in wrap_api_call
2024-07-11 19:39:34     raise rewrite_error(error, f"{parsed_st['apiName']}: {error}") from None
2024-07-11 19:39:34 playwright._impl._errors.TargetClosedError: Browser.new_context: Target page, context or browser has been closed
2024-07-11 19:39:34 Browser logs:
2024-07-11 19:39:34 
2024-07-11 19:39:34 <launching> /ms-playwright/chromium-1117/chrome-linux/chrome --disable-field-trial-config --disable-background-networking --enable-features=NetworkService,NetworkServiceInProcess --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-back-forward-cache --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --no-default-browser-check --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate,HttpsUpgrades,PaintHolding --allow-pre-commit-input --disable-hang-monitor --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --force-color-profile=srgb --metrics-recording-only --no-first-run --enable-automation --password-store=basic --use-mock-keychain --no-service-autorun --export-tagged-pdf --disable-search-engine-choice-screen --headless --hide-scrollbars --mute-audio --blink-settings=primaryHoverType=2,availableHoverTypes=2,primaryPointerType=4,availablePointerTypes=4 --no-sandbox --user-data-dir=/tmp/playwright_chromiumdev_profile-XXXXXXgg1KAC --remote-debugging-pipe --no-startup-window
2024-07-11 19:39:34 <launched> pid=268
2024-07-11 19:39:34 [pid=268][err] [0711/163932.708366:FATAL:zygote_main_linux.cc(145)] Check failed: sandbox::ThreadHelpers::IsSingleThreaded(). 
2024-07-11 19:39:34 [pid=268][err] #0 0x55555dea5332 base::debug::CollectStackTrace()
2024-07-11 19:39:34 [pid=268][err] #1 0x55555de929fe base::debug::StackTrace::StackTrace()
2024-07-11 19:39:34 [pid=268][err] #2 0x55555ddeb5e9 logging::LogMessage::Flush()
2024-07-11 19:39:34 [pid=268][err] #3 0x55555ddeb4cd logging::LogMessage::~LogMessage()
2024-07-11 19:39:34 [pid=268][err] #4 0x55555ddd5350 logging::(anonymous namespace)::CheckLogMessage::~CheckLogMessage()
2024-07-11 19:39:34 [pid=268][err] #5 0x55555ddd53ae logging::(anonymous namespace)::CheckLogMessage::~CheckLogMessage()
2024-07-11 19:39:34 [pid=268][err] #6 0x55555ddd5133 logging::CheckError::~CheckError()
2024-07-11 19:39:34 [pid=268][err] #7 0x55555d2dfb77 content::ZygoteMain()
2024-07-11 19:39:34 [pid=268][err] #8 0x55555d2d93d1 content::RunZygote()
2024-07-11 19:39:34 [pid=268][err] #9 0x55555d2da1ae content::RunOtherNamedProcessTypeMain()
2024-07-11 19:39:34 [pid=268][err] #10 0x55555d2db28d content::ContentMainRunnerImpl::Run()
2024-07-11 19:39:34 [pid=268][err] #11 0x55555d2d8c72 content::RunContentProcess()
2024-07-11 19:39:34 [pid=268][err] #12 0x55555d2d8eb7 content::ContentMain()
2024-07-11 19:39:34 [pid=268][err] #13 0x55555d887a3a headless::HeadlessShellMain()
2024-07-11 19:39:34 [pid=268][err] #14 0x55555945929a ChromeMain
2024-07-11 19:39:34 [pid=268][err] #15 0x2aaaac035083 __libc_start_main
2024-07-11 19:39:34 [pid=268][err] #16 0x55555945902a _start
2024-07-11 19:39:34 [pid=268][err] 
2024-07-11 19:39:34 [pid=268][err] [0711/163932.708329:FATAL:zygote_main_linux.cc(145)] Check failed: sandbox::ThreadHelpers::IsSingleThreaded(). 
2024-07-11 19:39:34 [pid=268][err] #0 0x55555dea5332 base::debug::CollectStackTrace()
2024-07-11 19:39:34 [pid=268][err] #1 0x55555de929fe base::debug::StackTrace::StackTrace()
2024-07-11 19:39:34 [pid=268][err] #2 0x55555ddeb5e9 logging::LogMessage::Flush()
2024-07-11 19:39:34 [pid=268][err] #3 0x55555ddeb4cd logging::LogMessage::~LogMessage()
2024-07-11 19:39:34 [pid=268][err] #4 0x55555ddd5350 logging::(anonymous namespace)::CheckLogMessage::~CheckLogMessage()
2024-07-11 19:39:34 [pid=268][err] #5 0x55555ddd53ae logging::(anonymous namespace)::CheckLogMessage::~CheckLogMessage()
2024-07-11 19:39:34 [pid=268][err] #6 0x55555ddd5133 logging::CheckError::~CheckError()
2024-07-11 19:39:34 [pid=268][err] #7 0x55555d2dfb77 content::ZygoteMain()
2024-07-11 19:39:34 [pid=268][err] #8 0x55555d2d93d1 content::RunZygote()
2024-07-11 19:39:34 [pid=268][err] #9 0x55555d2da1ae content::RunOtherNamedProcessTypeMain()
2024-07-11 19:39:34 [pid=268][err] #10 0x55555d2db28d content::ContentMainRunnerImpl::Run()
2024-07-11 19:39:34 [pid=268][err] #11 0x55555d2d8c72 content::RunContentProcess()
2024-07-11 19:39:34 [pid=268][err] #12 0x55555d2d8eb7 content::ContentMain()
2024-07-11 19:39:34 [pid=268][err] #13 0x55555d887a3a headless::HeadlessShellMain()
2024-07-11 19:39:34 [pid=268][err] #14 0x55555945929a ChromeMain
2024-07-11 19:39:34 [pid=268][err] #15 0x2aaaac035083 __libc_start_main
2024-07-11 19:39:34 [pid=268][err] #16 0x55555945902a _start
2024-07-11 19:39:34 [pid=268][err] 
2024-07-11 19:39:34 [pid=268][err] qemu: uncaught target signal 5 (Trace/breakpoint trap) - core dumped
2024-07-11 19:39:34 [pid=268][err] qemu: uncaught target signal 5 (Trace/breakpoint trap) - core dumped
2024-07-11 19:39:34 [pid=268][err] [0711/163932.933039:ERROR:bus.cc(407)] Failed to connect to the bus: Failed to connect to socket /var/run/dbus/system_bus_socket: No such file or directory
2024-07-11 19:39:34 [pid=268][err] [0711/163932.964341:ERROR:bus.cc(407)] Failed to connect to the bus: Failed to connect to socket /var/run/dbus/system_bus_socket: No such file or directory
2024-07-11 19:39:34 [pid=268][err] [0711/163932.965266:ERROR:bus.cc(407)] Failed to connect to the bus: Failed to connect to socket /var/run/dbus/system_bus_socket: No such file or directory
2024-07-11 19:39:34 [pid=268][err] [0711/163933.320372:INFO:config_dir_policy_loader.cc(118)] Skipping mandatory platform policies because no policy file was found at: /etc/chromium/policies/managed
2024-07-11 19:39:34 [pid=268][err] [0711/163933.320690:INFO:config_dir_policy_loader.cc(118)] Skipping recommended platform policies because no policy file was found at: /etc/chromium/policies/recommended
2024-07-11 19:39:34 [pid=268][err] [0711/163933.561331:WARNING:bluez_dbus_manager.cc(248)] Floss manager not present, cannot set Floss enable/disable.
2024-07-11 19:39:34 [pid=268][err] [0711/163933.568294:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.569063:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 1 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.637155:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.637244:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 2 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.651998:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.652063:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 3 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.658765:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.658816:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 4 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.664196:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.664257:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 5 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.669701:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.669844:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 6 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.679507:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.679589:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 7 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.683801:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.683877:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 8 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.687790:ERROR:gpu_process_host.cc(993)] GPU process launch failed: error_code=1002
2024-07-11 19:39:34 [pid=268][err] [0711/163933.687919:WARNING:gpu_process_host.cc(1433)] The GPU process has crashed 9 time(s)
2024-07-11 19:39:34 [pid=268][err] [0711/163933.688217:FATAL:gpu_data_manager_impl_private.cc(449)] GPU process isn't usable. Goodbye.
2024-07-11 19:39:34 [pid=268][err] #0 0x55555dea5332 base::debug::CollectStackTrace()
2024-07-11 19:39:34 [pid=268][err] #1 0x55555de929fe base::debug::StackTrace::StackTrace()
2024-07-11 19:39:34 [pid=268][err] #2 0x55555ddeb5e9 logging::LogMessage::Flush()
2024-07-11 19:39:34 [pid=268][err] #3 0x55555ddec0e9 logging::LogMessageFatal::~LogMessageFatal()
2024-07-11 19:39:34 [pid=268][err] #4 0x55555bde3547 content::(anonymous namespace)::IntentionallyCrashBrowserForUnusableGpuProcess()
2024-07-11 19:39:34 [pid=268][err] #5 0x55555bde0391 content::GpuDataManagerImplPrivate::FallBackToNextGpuMode()
2024-07-11 19:39:34 [pid=268][err] #6 0x55555bddef9b content::GpuDataManagerImpl::FallBackToNextGpuMode()
2024-07-11 19:39:34 [pid=268][err] #7 0x55555bdeb9c9 content::GpuProcessHost::RecordProcessCrash()
2024-07-11 19:39:34 [pid=268][err] #8 0x55555bdecea7 content::GpuProcessHost::OnProcessLaunchFailed()
2024-07-11 19:39:34 [pid=268][err] #9 0x55555bbb2141 content::BrowserChildProcessHostImpl::OnProcessLaunchFailed()
2024-07-11 19:39:34 [pid=268][err] #10 0x55555bc2da0a content::internal::ChildProcessLauncherHelper::PostLaunchOnClientThread()
2024-07-11 19:39:34 [pid=268][err] #11 0x55555bc2dd36 base::internal::Invoker<>::RunOnce()
2024-07-11 19:39:34 [pid=268][err] #12 0x55555de38f9f base::TaskAnnotator::RunTaskImpl()
2024-07-11 19:39:34 [pid=268][err] #13 0x55555de523ad base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWorkImpl()
2024-07-11 19:39:34 [pid=268][err] #14 0x55555de51e02 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWork()
2024-07-11 19:39:34 [pid=268][err] #15 0x55555de52845 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::DoWork()
2024-07-11 19:39:34 [pid=268][err] #16 0x55555debb3bc base::MessagePumpGlib::Run()
2024-07-11 19:39:34 [pid=268][err] #17 0x55555de52bd0 base::sequence_manager::internal::ThreadControllerWithMessagePumpImpl::Run()
2024-07-11 19:39:34 [pid=268][err] #18 0x55555de1a871 base::RunLoop::Run()
2024-07-11 19:39:34 [pid=268][err] #19 0x55555bbd4a6a content::BrowserMainLoop::RunMainMessageLoop()
2024-07-11 19:39:34 [pid=268][err] #20 0x55555bbd66d2 content::BrowserMainRunnerImpl::Run()
2024-07-11 19:39:34 [pid=268][err] #21 0x5555645edc9f headless::HeadlessContentMainDelegate::RunProcess()
2024-07-11 19:39:34 [pid=268][err] #22 0x55555d2d9e56 content::RunBrowserProcessMain()
2024-07-11 19:39:34 [pid=268][err] #23 0x55555d2db47c content::ContentMainRunnerImpl::RunBrowser()
2024-07-11 19:39:34 [pid=268][err] #24 0x55555d2db2b1 content::ContentMainRunnerImpl::Run()
2024-07-11 19:39:34 [pid=268][err] #25 0x55555d2d8c72 content::RunContentProcess()
2024-07-11 19:39:34 [pid=268][err] #26 0x55555d2d8eb7 content::ContentMain()
2024-07-11 19:39:34 [pid=268][err] #27 0x55555d887918 headless::HeadlessShellMain()
2024-07-11 19:39:34 [pid=268][err] #28 0x55555945929a ChromeMain
2024-07-11 19:39:34 [pid=268][err] #29 0x2aaaac035083 __libc_start_main
2024-07-11 19:39:34 [pid=268][err] #30 0x55555945902a _start
2024-07-11 19:39:34 [pid=268][err] Task trace:
2024-07-11 19:39:34 [pid=268][err] #0 0x55555bc2d89f content::internal::ChildProcessLauncherHelper::PostLaunchOnLauncherThread()
2024-07-11 19:39:34 [pid=268][err] #1 0x55555bc2cd68 content::internal::ChildProcessLauncherHelper::StartLaunchOnClientThread()
2024-07-11 19:39:34 [pid=268][err] #2 0x55555e558658 mojo::SimpleWatcher::Context::Notify()
2024-07-11 19:39:34 [pid=268][err] #3 0x55555e558658 mojo::SimpleWatcher::Context::Notify()
2024-07-11 19:39:34 [pid=268][err] #4 0x55555e558658 mojo::SimpleWatcher::Context::Notify()
2024-07-11 19:39:34 [pid=268][err] Task trace buffer limit hit, update PendingTask::kTaskBacktraceLength to increase.
2024-07-11 19:39:34 [pid=268][err] 
2024-07-11 19:39:34 [pid=268][err] qemu: uncaught target signal 5 (Trace/breakpoint trap) - core dumped

notes:

I tried disabling the gpu and i tried to use a single process but none of that worked unfortunately

Theme wordpress giá rẻ Theme wordpress giá rẻ Thiết kế website

LEAVE A COMMENT