Search code examples
pythongoogle-chrome-extensionplaywrightplaywright-pythonbrowser-use

How to load and use an extension within Browser-use?


I'm using browser-use for web automation. This package uses playwright under the hood. I realized it is not possible to load an extension in incognito mode, so I must use playwright.chromium.launch_persistent_context instead of playwright.chromium.launch. But browser-use uses playwright.chromium.launch. So I wanted to override the Browser class to change this and load my extension there. However, with the following code I have written so far, it gets stuck and the Chromium instance isn't run like the normal mode:

import asyncio
import os

from browser_use import Agent, BrowserConfig, Browser
from browser_use.browser.browser import logger
from langchain_openai import ChatOpenAI
from playwright.async_api import async_playwright, Playwright

extension_path = "/path/to/capsolver-extension"


class CustomBrowser(Browser):
    async def _setup_browser(self, playwright: Playwright):
        """Sets up and returns a Playwright Browser instance with persistent context."""
        if self.config.wss_url:
            browser = await playwright.chromium.connect(self.config.wss_url)
            return browser
        elif self.config.chrome_instance_path:
            import subprocess

            import requests

            try:
                # Check if browser is already running
                response = requests.get('http://localhost:9222/json/version', timeout=2)
                if response.status_code == 200:
                    logger.info('Reusing existing Chrome instance')
                    browser = await playwright.chromium.connect_over_cdp(
                        endpoint_url='http://localhost:9222',
                        timeout=20000,  # 20 second timeout for connection
                    )
                    return browser
            except requests.ConnectionError:
                logger.debug('No existing Chrome instance found, starting a new one')

            # Start a new Chrome instance
            subprocess.Popen(
                [
                    self.config.chrome_instance_path,
                    '--remote-debugging-port=9222',
                ],
                stdout=subprocess.DEVNULL,
                stderr=subprocess.DEVNULL,
            )

            # Attempt to connect again after starting a new instance
            try:
                browser = await playwright.chromium.connect_over_cdp(
                    endpoint_url='http://localhost:9222',
                    timeout=20000,  # 20 second timeout for connection
                )
                return browser
            except Exception as e:
                logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
                raise RuntimeError(
                    ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
                )
        else:
            try:
                disable_security_args = []
                if self.config.disable_security:
                    disable_security_args = [
                        '--disable-web-security',
                        '--disable-site-isolation-trials',
                        '--disable-features=IsolateOrigins,site-per-process',
                    ]

                # Use launch_persistent_context instead of launch
                user_data_dir = os.path.join(os.getcwd(), "user_data") # Specify the path to the user data directory
                browser_context = await playwright.chromium.launch_persistent_context(
                    user_data_dir=user_data_dir,
                    headless=self.config.headless,
                    args=[
                        '--no-sandbox',
                        '--disable-blink-features=AutomationControlled',
                        '--disable-infobars',
                        '--disable-background-timer-throttling',
                        '--disable-popup-blocking',
                        '--disable-backgrounding-occluded-windows',
                        '--disable-renderer-backgrounding',
                        '--disable-window-activation',
                        '--disable-focus-on-load',
                        '--no-first-run',
                        '--no-default-browser-check',
                        '--no-startup-window',
                        '--window-position=0,0',
                        # f"--disable-extensions-except={extension_path}",
                        # f'--load-extension={extension_path}',  # Load the extension
                    ]
                    + disable_security_args
                    + self.config.extra_chromium_args,
                    proxy=self.config.proxy,
                )

                return browser_context
            except Exception as e:
                logger.error(f'Failed to initialize Playwright browser: {str(e)}')
                raise

config = BrowserConfig(
    extra_chromium_args=[
        f"--disable-extensions-except={extension_path}",
        f"--load-extension={extension_path}",
        "--disable-web-security",  # Optional, for testing purposes
        "--disable-site-isolation-trials"
    ]
)
browser = CustomBrowser(config=config)

async def main():
    # custom_browser = CustomBrowser(config=BrowserConfig())
    agent = Agent(
        task="Go to Reddit, search for 'browser-use' in the search bar, click on the first post and return the first comment.",
        llm=ChatOpenAI(model="gpt-4o"),
        browser=browser,
    )
    result = await agent.run()
    print(result)

asyncio.run(main())

Error which raises after a period of time when it got stuck:

INFO     [browser_use] BrowserUse logging setup complete with level info
INFO     [root] Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
INFO     [agent] 🚀 Starting task: Go to google flight and book a flight from New York to Los Angeles
INFO     [agent] 
📍 Step 1
ERROR    [browser] Failed to initialize Playwright browser: BrowserType.launch_persistent_context: Timeout 180000ms exceeded.
Call log:
  - <launching> /home/benyamin/.cache/ms-playwright/chromium-1148/chrome-linux/chrome --disable-field-trial-config --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-back-forward-cache --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --no-default-browser-check --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate,HttpsUpgrades,PaintHolding,ThirdPartyStoragePartitioning,LensOverlay,PlzDedicatedWorker --allow-pre-commit-input --disable-hang-monitor --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --force-color-profile=srgb --metrics-recording-only --no-first-run --enable-automation --password-store=basic --use-mock-keychain --no-service-autorun --export-tagged-pdf --disable-search-engine-choice-screen --unsafely-disable-devtools-self-xss-warnings --no-sandbox --no-sandbox --disable-blink-features=AutomationControlled --disable-infobars --disable-background-timer-throttling --disable-popup-blocking --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-window-activation --disable-focus-on-load --no-first-run --no-default-browser-check --no-startup-window --window-position=0,0 --disable-web-security --disable-site-isolation-trials --disable-features=IsolateOrigins,site-per-process --disable-extensions-except=/home/benyamin/PycharmProjects/stack/capsolver-extension --load-extension=/home/benyamin/PycharmProjects/stack/capsolver-extension --disable-web-security --disable-site-isolation-trials --user-data-dir=/home/benyamin/PycharmProjects/stack/user_data --remote-debugging-pipe about:blank
  -   - <launched> pid=683538
  -   - [pid=683538][err] [683538:683538:0117/224944.131425:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
  -   - [pid=683538][err] [683538:683538:0117/224944.167807:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
  -   - [pid=683538][err] [683538:683549:0117/224947.134480:ERROR:nss_util.cc(345)] After loading Root Certs, loaded==false: NSS error code: -8018
  -   - [pid=683538][err] [685058:685058:0117/225144.025929:ERROR:gpu_blocklist.cc(71)] Unable to get gpu adapter

WARNING  [browser] Page load failed, continuing...

Solution

  • Finally, I ended up with the issue. The error occurs because launch_persistent_context returns a BrowserContext directly, while the library (Agent) expects a Browser instance.

    • Therefore, I created a CustomBrowserContext class that can work with both regular and persistent contexts.
    • Modified CustomBrowser to store and manage the persistent context.
    • Added a BrowserWrapper class to provide a compatible interface between the persistent context and the expected Browser interface.
    • Updated the context initialization to properly handle the persistent context case.
    # Custom Browser
    
    import asyncio
    import os
    import logging
    import subprocess
    import requests
    from dataclasses import dataclass, field
    from playwright._impl._api_structures import ProxySettings
    from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
    from playwright.async_api import Playwright, async_playwright
    from browser_use.browser.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig
    
    logger = logging.getLogger(__name__)
    
    class CustomBrowserContext(BrowserContext):
        """Custom BrowserContext that works with persistent context"""
        def __init__(
            self,
            config: BrowserContextConfig,
            browser: 'CustomBrowser',
            persistent_context: PlaywrightBrowserContext = None
        ):
            super().__init__(config, browser)
            self._persistent_context = persistent_context
            if persistent_context:
                self.session = persistent_context.pages[0] if persistent_context.pages else None
    
        async def _init(self):
            """Initialize browser context session"""
            if self._persistent_context:
                if not self.session:
                    self.session = await self._persistent_context.new_page()
                return self.session
            return await super()._init()
    
        async def close(self):
            """Close browser context"""
            if self.session:
                await self.session.close()
                self.session = None
    
    class CustomBrowser(Browser):
        """Custom Browser that supports persistent context"""
        def __init__(self, config: BrowserConfig = BrowserConfig()):
            super().__init__(config)
            self._persistent_context = None
    
        async def new_context(self, config: BrowserContextConfig = BrowserContextConfig()) -> CustomBrowserContext:
            """Create a browser context"""
            return CustomBrowserContext(config=config, browser=self, persistent_context=self._persistent_context)
    
        async def _setup_browser(self, playwright: Playwright):
            """Sets up and returns a Playwright Browser instance or BrowserContext with persistent context."""
            if self.config.wss_url:
                browser = await playwright.chromium.connect(self.config.wss_url)
                return browser
            elif self.config.chrome_instance_path:
                try:
                    # Check if browser is already running
                    response = requests.get('http://localhost:9222/json/version', timeout=2)
                    if response.status_code == 200:
                        logger.info('Reusing existing Chrome instance')
                        browser = await playwright.chromium.connect_over_cdp(
                            endpoint_url='http://localhost:9222',
                            timeout=20000,  # 20 second timeout for connection
                        )
                        return browser
                except requests.ConnectionError:
                    logger.debug('No existing Chrome instance found, starting a new one')
    
                # Start a new Chrome instance
                subprocess.Popen(
                    [
                        self.config.chrome_instance_path,
                        '--remote-debugging-port=9222',
                    ],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.DEVNULL,
                )
    
                # Attempt to connect again after starting a new instance
                try:
                    browser = await playwright.chromium.connect_over_cdp(
                        endpoint_url='http://localhost:9222',
                        timeout=20000,  # 20 second timeout for connection
                    )
                    return browser
                except Exception as e:
                    logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
                    raise RuntimeError(
                        ' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
                    )
            else:
                try:
                    disable_security_args = []
                    if self.config.disable_security:
                        disable_security_args = [
                            '--disable-web-security',
                            '--disable-site-isolation-trials',
                            '--disable-features=IsolateOrigins,site-per-process',
                        ]
    
                    user_data_dir = os.path.join(os.getcwd(), "user_data")
                    logger.info(f'Using user data directory: {user_data_dir}')
    
                    self._persistent_context = await playwright.chromium.launch_persistent_context(
                        user_data_dir=user_data_dir,
                        headless=self.config.headless,
                        args=[
                            '--no-sandbox',
                            '--disable-blink-features=AutomationControlled',
                            '--disable-infobars',
                            '--disable-background-timer-throttling',
                            '--disable-popup-blocking',
                            '--disable-backgrounding-occluded-windows',
                            '--disable-renderer-backgrounding',
                            '--disable-window-activation',
                            '--disable-focus-on-load',
                            '--no-first-run',
                            '--no-default-browser-check',
                            '--window-position=0,0',
                        ] + disable_security_args + self.config.extra_chromium_args,
                        proxy=self.config.proxy,
                    )
    
                    # Create a wrapper object that mimics the Browser interface
                    class BrowserWrapper:
                        def __init__(self, context):
                            self.context = context
    
                        async def new_context(self, **kwargs):
                            return self.context
    
                        async def close(self):
                            await self.context.close()
    
                    return BrowserWrapper(self._persistent_context)
    
                except Exception as e:
                    logger.error(f'Failed to initialize Playwright browser: {str(e)}')
                    raise
    
        async def close(self):
            """Close the browser instance"""
            try:
                if self._persistent_context:
                    await self._persistent_context.close()
                await super().close()
            except Exception as e:
                logger.error(f'Failed to close browser properly: {e}')
            finally:
                self._persistent_context = None
    
    # Usage
    
    import asyncio
    import os
    from browser_use import Agent
    from browser_use.browser.browser import BrowserConfig
    from langchain_openai import ChatOpenAI
    
    # Define your extension path
    extension_path = "/path/to/capsolver-extension"
    
    # Create browser configuration
    config = BrowserConfig(
        headless=False,  # Set to True if you want to run in headless mode
        disable_security=True,
        extra_chromium_args=[
            f"--disable-extensions-except={extension_path}",
            f"--load-extension={extension_path}",
            "--disable-web-security",
            "--disable-site-isolation-trials"
        ]
    )
    
    async def main():
        # Initialize the custom browser
        browser = CustomBrowser(config=config)
    
        # Create the agent
        agent = Agent(
            task="Go to google flight and book a flight from New York to Los Angeles",
            llm=ChatOpenAI(model="gpt-4o"),
            browser=browser,
        )
    
        print("Starting agent execution...")
        result = await agent.run()
        print("Execution completed!")
        print(result)
    
    if __name__ == "__main__":
        asyncio.run(main())