I'm using browser-use for web automation. This package uses playwright under the hood. I realized it is not possible to load an extension in incognito mode, so I must use playwright.chromium.launch_persistent_context
instead of playwright.chromium.launch
. But browser-use
uses playwright.chromium.launch
. So I wanted to override the Browser class to change this and load my extension there. However, with the following code I have written so far, it gets stuck and the Chromium instance isn't run like the normal mode:
import asyncio
import os
from browser_use import Agent, BrowserConfig, Browser
from browser_use.browser.browser import logger
from langchain_openai import ChatOpenAI
from playwright.async_api import async_playwright, Playwright
extension_path = "/path/to/capsolver-extension"
class CustomBrowser(Browser):
async def _setup_browser(self, playwright: Playwright):
"""Sets up and returns a Playwright Browser instance with persistent context."""
if self.config.wss_url:
browser = await playwright.chromium.connect(self.config.wss_url)
return browser
elif self.config.chrome_instance_path:
import subprocess
import requests
try:
# Check if browser is already running
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
logger.info('Reusing existing Chrome instance')
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except requests.ConnectionError:
logger.debug('No existing Chrome instance found, starting a new one')
# Start a new Chrome instance
subprocess.Popen(
[
self.config.chrome_instance_path,
'--remote-debugging-port=9222',
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Attempt to connect again after starting a new instance
try:
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except Exception as e:
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
raise RuntimeError(
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
)
else:
try:
disable_security_args = []
if self.config.disable_security:
disable_security_args = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-features=IsolateOrigins,site-per-process',
]
# Use launch_persistent_context instead of launch
user_data_dir = os.path.join(os.getcwd(), "user_data") # Specify the path to the user data directory
browser_context = await playwright.chromium.launch_persistent_context(
user_data_dir=user_data_dir,
headless=self.config.headless,
args=[
'--no-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--disable-background-timer-throttling',
'--disable-popup-blocking',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-window-activation',
'--disable-focus-on-load',
'--no-first-run',
'--no-default-browser-check',
'--no-startup-window',
'--window-position=0,0',
# f"--disable-extensions-except={extension_path}",
# f'--load-extension={extension_path}', # Load the extension
]
+ disable_security_args
+ self.config.extra_chromium_args,
proxy=self.config.proxy,
)
return browser_context
except Exception as e:
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
raise
config = BrowserConfig(
extra_chromium_args=[
f"--disable-extensions-except={extension_path}",
f"--load-extension={extension_path}",
"--disable-web-security", # Optional, for testing purposes
"--disable-site-isolation-trials"
]
)
browser = CustomBrowser(config=config)
async def main():
# custom_browser = CustomBrowser(config=BrowserConfig())
agent = Agent(
task="Go to Reddit, search for 'browser-use' in the search bar, click on the first post and return the first comment.",
llm=ChatOpenAI(model="gpt-4o"),
browser=browser,
)
result = await agent.run()
print(result)
asyncio.run(main())
Error which raises after a period of time when it got stuck:
INFO [browser_use] BrowserUse logging setup complete with level info
INFO [root] Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
INFO [agent] 🚀 Starting task: Go to google flight and book a flight from New York to Los Angeles
INFO [agent]
📍 Step 1
ERROR [browser] Failed to initialize Playwright browser: BrowserType.launch_persistent_context: Timeout 180000ms exceeded.
Call log:
- <launching> /home/benyamin/.cache/ms-playwright/chromium-1148/chrome-linux/chrome --disable-field-trial-config --disable-background-networking --disable-background-timer-throttling --disable-backgrounding-occluded-windows --disable-back-forward-cache --disable-breakpad --disable-client-side-phishing-detection --disable-component-extensions-with-background-pages --disable-component-update --no-default-browser-check --disable-default-apps --disable-dev-shm-usage --disable-extensions --disable-features=ImprovedCookieControls,LazyFrameLoading,GlobalMediaControls,DestroyProfileOnBrowserClose,MediaRouter,DialMediaRouteProvider,AcceptCHFrame,AutoExpandDetailsElement,CertificateTransparencyComponentUpdater,AvoidUnnecessaryBeforeUnloadCheckSync,Translate,HttpsUpgrades,PaintHolding,ThirdPartyStoragePartitioning,LensOverlay,PlzDedicatedWorker --allow-pre-commit-input --disable-hang-monitor --disable-ipc-flooding-protection --disable-popup-blocking --disable-prompt-on-repost --disable-renderer-backgrounding --force-color-profile=srgb --metrics-recording-only --no-first-run --enable-automation --password-store=basic --use-mock-keychain --no-service-autorun --export-tagged-pdf --disable-search-engine-choice-screen --unsafely-disable-devtools-self-xss-warnings --no-sandbox --no-sandbox --disable-blink-features=AutomationControlled --disable-infobars --disable-background-timer-throttling --disable-popup-blocking --disable-backgrounding-occluded-windows --disable-renderer-backgrounding --disable-window-activation --disable-focus-on-load --no-first-run --no-default-browser-check --no-startup-window --window-position=0,0 --disable-web-security --disable-site-isolation-trials --disable-features=IsolateOrigins,site-per-process --disable-extensions-except=/home/benyamin/PycharmProjects/stack/capsolver-extension --load-extension=/home/benyamin/PycharmProjects/stack/capsolver-extension --disable-web-security --disable-site-isolation-trials --user-data-dir=/home/benyamin/PycharmProjects/stack/user_data --remote-debugging-pipe about:blank
- - <launched> pid=683538
- - [pid=683538][err] [683538:683538:0117/224944.131425:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
- - [pid=683538][err] [683538:683538:0117/224944.167807:ERROR:service_worker_task_queue.cc(196)] DidStartWorkerFail nbdgbpgkphcgkjiadleadooiojilllaj: 5
- - [pid=683538][err] [683538:683549:0117/224947.134480:ERROR:nss_util.cc(345)] After loading Root Certs, loaded==false: NSS error code: -8018
- - [pid=683538][err] [685058:685058:0117/225144.025929:ERROR:gpu_blocklist.cc(71)] Unable to get gpu adapter
WARNING [browser] Page load failed, continuing...
Finally, I ended up with the issue. The error occurs because launch_persistent_context
returns a BrowserContext
directly, while the library (Agent) expects a Browser
instance.
CustomBrowserContext
class that can work with both regular and persistent contexts.CustomBrowser
to store and manage the persistent context.BrowserWrapper
class to provide a compatible interface between the persistent context and the expected Browser interface.# Custom Browser
import asyncio
import os
import logging
import subprocess
import requests
from dataclasses import dataclass, field
from playwright._impl._api_structures import ProxySettings
from playwright.async_api import Browser as PlaywrightBrowser, BrowserContext as PlaywrightBrowserContext
from playwright.async_api import Playwright, async_playwright
from browser_use.browser.browser import Browser, BrowserConfig, BrowserContext, BrowserContextConfig
logger = logging.getLogger(__name__)
class CustomBrowserContext(BrowserContext):
"""Custom BrowserContext that works with persistent context"""
def __init__(
self,
config: BrowserContextConfig,
browser: 'CustomBrowser',
persistent_context: PlaywrightBrowserContext = None
):
super().__init__(config, browser)
self._persistent_context = persistent_context
if persistent_context:
self.session = persistent_context.pages[0] if persistent_context.pages else None
async def _init(self):
"""Initialize browser context session"""
if self._persistent_context:
if not self.session:
self.session = await self._persistent_context.new_page()
return self.session
return await super()._init()
async def close(self):
"""Close browser context"""
if self.session:
await self.session.close()
self.session = None
class CustomBrowser(Browser):
"""Custom Browser that supports persistent context"""
def __init__(self, config: BrowserConfig = BrowserConfig()):
super().__init__(config)
self._persistent_context = None
async def new_context(self, config: BrowserContextConfig = BrowserContextConfig()) -> CustomBrowserContext:
"""Create a browser context"""
return CustomBrowserContext(config=config, browser=self, persistent_context=self._persistent_context)
async def _setup_browser(self, playwright: Playwright):
"""Sets up and returns a Playwright Browser instance or BrowserContext with persistent context."""
if self.config.wss_url:
browser = await playwright.chromium.connect(self.config.wss_url)
return browser
elif self.config.chrome_instance_path:
try:
# Check if browser is already running
response = requests.get('http://localhost:9222/json/version', timeout=2)
if response.status_code == 200:
logger.info('Reusing existing Chrome instance')
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except requests.ConnectionError:
logger.debug('No existing Chrome instance found, starting a new one')
# Start a new Chrome instance
subprocess.Popen(
[
self.config.chrome_instance_path,
'--remote-debugging-port=9222',
],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
# Attempt to connect again after starting a new instance
try:
browser = await playwright.chromium.connect_over_cdp(
endpoint_url='http://localhost:9222',
timeout=20000, # 20 second timeout for connection
)
return browser
except Exception as e:
logger.error(f'Failed to start a new Chrome instance.: {str(e)}')
raise RuntimeError(
' To start chrome in Debug mode, you need to close all existing Chrome instances and try again otherwise we can not connect to the instance.'
)
else:
try:
disable_security_args = []
if self.config.disable_security:
disable_security_args = [
'--disable-web-security',
'--disable-site-isolation-trials',
'--disable-features=IsolateOrigins,site-per-process',
]
user_data_dir = os.path.join(os.getcwd(), "user_data")
logger.info(f'Using user data directory: {user_data_dir}')
self._persistent_context = await playwright.chromium.launch_persistent_context(
user_data_dir=user_data_dir,
headless=self.config.headless,
args=[
'--no-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-infobars',
'--disable-background-timer-throttling',
'--disable-popup-blocking',
'--disable-backgrounding-occluded-windows',
'--disable-renderer-backgrounding',
'--disable-window-activation',
'--disable-focus-on-load',
'--no-first-run',
'--no-default-browser-check',
'--window-position=0,0',
] + disable_security_args + self.config.extra_chromium_args,
proxy=self.config.proxy,
)
# Create a wrapper object that mimics the Browser interface
class BrowserWrapper:
def __init__(self, context):
self.context = context
async def new_context(self, **kwargs):
return self.context
async def close(self):
await self.context.close()
return BrowserWrapper(self._persistent_context)
except Exception as e:
logger.error(f'Failed to initialize Playwright browser: {str(e)}')
raise
async def close(self):
"""Close the browser instance"""
try:
if self._persistent_context:
await self._persistent_context.close()
await super().close()
except Exception as e:
logger.error(f'Failed to close browser properly: {e}')
finally:
self._persistent_context = None
# Usage
import asyncio
import os
from browser_use import Agent
from browser_use.browser.browser import BrowserConfig
from langchain_openai import ChatOpenAI
# Define your extension path
extension_path = "/path/to/capsolver-extension"
# Create browser configuration
config = BrowserConfig(
headless=False, # Set to True if you want to run in headless mode
disable_security=True,
extra_chromium_args=[
f"--disable-extensions-except={extension_path}",
f"--load-extension={extension_path}",
"--disable-web-security",
"--disable-site-isolation-trials"
]
)
async def main():
# Initialize the custom browser
browser = CustomBrowser(config=config)
# Create the agent
agent = Agent(
task="Go to google flight and book a flight from New York to Los Angeles",
llm=ChatOpenAI(model="gpt-4o"),
browser=browser,
)
print("Starting agent execution...")
result = await agent.run()
print("Execution completed!")
print(result)
if __name__ == "__main__":
asyncio.run(main())