Skip to content

Commit 0941a58

Browse files
authored
Browserbase updates
1 parent 629a53d commit 0941a58

File tree

1 file changed

+102
-6
lines changed

1 file changed

+102
-6
lines changed

computers/browserbase.py

Lines changed: 102 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
import os
22
from typing import Tuple, Dict, List, Union, Optional
3-
from playwright.sync_api import Browser, Page
3+
from playwright.sync_api import Browser, Page, BrowserContext, Error as PlaywrightError
44
from .base_playwright import BasePlaywrightComputer
55
from browserbase import Browserbase
66
from dotenv import load_dotenv
7+
import base64
78

89
load_dotenv()
910

1011

1112
class BrowserbaseBrowser(BasePlaywrightComputer):
1213
"""
1314
Browserbase is a headless browser platform that offers a remote browser API. You can use it to control thousands of browsers from anywhere.
14-
You can find more information about Browserbase at https://docs.browserbase.com/ or view our OpenAI CUA Quickstart at https://docs.browserbase.com/integrations/openai-cua/introduction.
15+
You can find more information about Browserbase at https://www.browserbase.com/computer-use or view our OpenAI CUA Quickstart at https://docs.browserbase.com/integrations/openai-cua/introduction.
16+
17+
IMPORTANT: This Browserbase computer requires the use of the `goto` tool defined in playwright_with_custom_functions.py.
18+
Make sure to include this tool in your configuration when using the Browserbase computer.
1519
"""
1620

1721
def __init__(
@@ -20,6 +24,8 @@ def __init__(
2024
height: int = 768,
2125
region: str = "us-west-2",
2226
proxy: bool = False,
27+
virtual_mouse: bool = True,
28+
ad_blocker: bool = False,
2329
):
2430
"""
2531
Initialize the Browserbase instance. Additional configuration options for features such as persistent cookies, ad blockers, file downloads and more can be found in the Browserbase API documentation: https://docs.browserbase.com/reference/api/create-a-session
@@ -29,6 +35,8 @@ def __init__(
2935
height (int): The height of the browser viewport. Default is 768.
3036
region (str): The region for the Browserbase session. Default is "us-west-2". Pick a region close to you for better performance. https://docs.browserbase.com/guides/multi-region
3137
proxy (bool): Whether to use a proxy for the session. Default is False. Turn on proxies if you're browsing is frequently interrupted. https://docs.browserbase.com/features/proxies
38+
virtual_mouse (bool): Whether to enable the virtual mouse cursor. Default is True.
39+
ad_blocker (bool): Whether to enable the built-in ad blocker. Default is False.
3240
"""
3341
super().__init__()
3442
self.bb = Browserbase(api_key=os.getenv("BROWSERBASE_API_KEY"))
@@ -37,6 +45,8 @@ def __init__(
3745
self.dimensions = (width, height)
3846
self.region = region
3947
self.proxy = proxy
48+
self.virtual_mouse = virtual_mouse
49+
self.ad_blocker = ad_blocker
4050

4151
def _get_browser_and_page(self) -> Tuple[Browser, Page]:
4252
"""
@@ -49,7 +59,10 @@ def _get_browser_and_page(self) -> Tuple[Browser, Page]:
4959
width, height = self.dimensions
5060
session_params = {
5161
"project_id": self.project_id,
52-
"browser_settings": {"viewport": {"width": width, "height": height}},
62+
"browser_settings": {
63+
"viewport": {"width": width, "height": height},
64+
"blockAds": self.ad_blocker,
65+
},
5366
"region": self.region,
5467
"proxies": self.proxy,
5568
}
@@ -61,13 +74,83 @@ def _get_browser_and_page(self) -> Tuple[Browser, Page]:
6174
)
6275

6376
# Connect to the remote session
64-
browser = self._playwright.chromium.connect_over_cdp(self.session.connect_url)
77+
browser = self._playwright.chromium.connect_over_cdp(
78+
self.session.connect_url,
79+
timeout=60000
80+
)
6581
context = browser.contexts[0]
82+
83+
# Add event listeners for page creation and closure
84+
context.on("page", self._handle_new_page)
85+
86+
# Only add the init script if virtual_mouse is True
87+
if self.virtual_mouse:
88+
context.add_init_script("""
89+
// Only run in the top frame
90+
if (window.self === window.top) {
91+
function initCursor() {
92+
const CURSOR_ID = '__cursor__';
93+
94+
// Check if cursor element already exists
95+
if (document.getElementById(CURSOR_ID)) return;
96+
97+
const cursor = document.createElement('div');
98+
cursor.id = CURSOR_ID;
99+
Object.assign(cursor.style, {
100+
position: 'fixed',
101+
top: '0px',
102+
left: '0px',
103+
width: '20px',
104+
height: '20px',
105+
backgroundImage: 'url("data:image/svg+xml;utf8,<svg xmlns=\\'http://www.w3.org/2000/svg\\' viewBox=\\'0 0 24 24\\' fill=\\'black\\' stroke=\\'white\\' stroke-width=\\'1\\' stroke-linejoin=\\'round\\' stroke-linecap=\\'round\\'><polygon points=\\'2,2 2,22 8,16 14,22 17,19 11,13 20,13\\'/></svg>")',
106+
backgroundSize: 'cover',
107+
pointerEvents: 'none',
108+
zIndex: '99999',
109+
transform: 'translate(-2px, -2px)',
110+
});
111+
112+
document.body.appendChild(cursor);
113+
114+
document.addEventListener("mousemove", (e) => {
115+
cursor.style.top = e.clientY + "px";
116+
cursor.style.left = e.clientX + "px";
117+
});
118+
}
119+
120+
// Use requestAnimationFrame for early execution
121+
requestAnimationFrame(function checkBody() {
122+
if (document.body) {
123+
initCursor();
124+
} else {
125+
requestAnimationFrame(checkBody);
126+
}
127+
});
128+
}
129+
""")
130+
66131
page = context.pages[0]
132+
page.on("close", self._handle_page_close)
133+
67134
page.goto("https://bing.com")
68135

69136
return browser, page
70137

138+
def _handle_new_page(self, page: Page):
139+
"""Handle the creation of a new page."""
140+
print("New page created")
141+
self._page = page
142+
page.on("close", self._handle_page_close)
143+
144+
def _handle_page_close(self, page: Page):
145+
"""Handle the closure of a page."""
146+
print("Page closed")
147+
if self._page == page:
148+
if self._browser.contexts[0].pages:
149+
self._page = self._browser.contexts[0].pages[-1]
150+
else:
151+
print("Warning: All pages have been closed.")
152+
self._page = None
153+
71154
def __exit__(self, exc_type, exc_val, exc_tb):
72155
"""
73156
Clean up resources when exiting the context manager.
@@ -91,9 +174,22 @@ def __exit__(self, exc_type, exc_val, exc_tb):
91174

92175
def screenshot(self) -> str:
93176
"""
94-
Capture a screenshot of the current viewport.
177+
Capture a screenshot of the current viewport using CDP.
95178
96179
Returns:
97180
str: A base64 encoded string of the screenshot.
98181
"""
99-
return super().screenshot()
182+
try:
183+
# Get CDP session from the page
184+
cdp_session = self._page.context.new_cdp_session(self._page)
185+
186+
# Capture screenshot using CDP
187+
result = cdp_session.send("Page.captureScreenshot", {
188+
"format": "png",
189+
"fromSurface": True
190+
})
191+
192+
return result['data']
193+
except PlaywrightError as error:
194+
print(f"CDP screenshot failed, falling back to standard screenshot: {error}")
195+
return super().screenshot()

0 commit comments

Comments
 (0)