Skip to content

Commit d11d09d

Browse files
committed
Added fixes for Devtools and Agent Server connection. Fixed Dockerfile
1 parent ffc45ec commit d11d09d

File tree

4 files changed

+255
-58
lines changed

4 files changed

+255
-58
lines changed

agent-server/nodejs/src/lib/BrowserAgentServer.js

Lines changed: 60 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -773,11 +773,66 @@ export class BrowserAgentServer extends EventEmitter {
773773
*/
774774
async getCDPBrowserEndpoint() {
775775
try {
776-
const cdpUrl = `http://${CONFIG.cdp.host}:${CONFIG.cdp.port}/json/version`;
777-
logger.info('Attempting to connect to CDP', { cdpUrl });
778-
const response = await fetch(cdpUrl);
779-
const data = await response.json();
780-
return data.webSocketDebuggerUrl;
776+
const path = '/json/version';
777+
logger.info('Attempting to connect to CDP', {
778+
host: CONFIG.cdp.host,
779+
port: CONFIG.cdp.port,
780+
path
781+
});
782+
783+
// When connecting via host.docker.internal, we need to set Host header to localhost
784+
// because Chrome only accepts CDP requests with localhost/127.0.0.1 in the Host header
785+
const headers = {};
786+
if (CONFIG.cdp.host === 'host.docker.internal') {
787+
headers['Host'] = `localhost:${CONFIG.cdp.port}`;
788+
logger.info('Using Host header override for host.docker.internal', headers);
789+
}
790+
791+
const options = {
792+
hostname: CONFIG.cdp.host,
793+
port: CONFIG.cdp.port,
794+
path: path,
795+
method: 'GET',
796+
headers: headers
797+
};
798+
799+
const http = await import('http');
800+
801+
return new Promise((resolve, reject) => {
802+
const req = http.default.request(options, (res) => {
803+
let data = '';
804+
805+
res.on('data', (chunk) => {
806+
data += chunk;
807+
});
808+
809+
res.on('end', () => {
810+
try {
811+
const jsonData = JSON.parse(data);
812+
let wsUrl = jsonData.webSocketDebuggerUrl;
813+
814+
// Replace localhost with host.docker.internal when running in Docker
815+
if (CONFIG.cdp.host === 'host.docker.internal' && wsUrl) {
816+
wsUrl = wsUrl.replace('ws://localhost:', 'ws://host.docker.internal:');
817+
wsUrl = wsUrl.replace('ws://127.0.0.1:', 'ws://host.docker.internal:');
818+
logger.info('Rewrote WebSocket URL for Docker', { original: jsonData.webSocketDebuggerUrl, rewritten: wsUrl });
819+
}
820+
821+
resolve(wsUrl);
822+
} catch (parseError) {
823+
logger.error('Failed to parse CDP response', { error: parseError.message, data });
824+
reject(new Error('Failed to connect to Chrome DevTools Protocol'));
825+
}
826+
});
827+
});
828+
829+
req.on('error', (error) => {
830+
logger.error('Failed to get CDP browser endpoint', { error: error.message });
831+
reject(new Error('Failed to connect to Chrome DevTools Protocol'));
832+
});
833+
834+
req.end();
835+
});
781836
} catch (error) {
782837
logger.error('Failed to get CDP browser endpoint', { error: error.message });
783838
throw new Error('Failed to connect to Chrome DevTools Protocol');

docker/Dockerfile

Lines changed: 73 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -47,25 +47,88 @@ RUN git remote add upstream https://github.com/BrowserOperator/browser-operator-
4747
RUN git fetch upstream
4848
RUN git checkout upstream/main
4949

50-
# Allow configurable automated mode
51-
ARG AUTOMATED_MODE=false
52-
53-
# Set build-time flags based on Docker arg
54-
RUN if [ "$AUTOMATED_MODE" = "true" ]; then \
55-
sed -i 's/AUTOMATED_MODE: false/AUTOMATED_MODE: true/' \
56-
front_end/panels/ai_chat/core/BuildConfig.ts; \
57-
fi
50+
# Force automated mode (matches parent Dockerfile.devtools approach - rock-solid)
51+
RUN sed -i 's/AUTOMATED_MODE: false/AUTOMATED_MODE: true/' \
52+
front_end/panels/ai_chat/core/BuildConfig.ts || true
5853

5954
# Build Browser Operator version with current changes
6055
RUN npm run build
6156

62-
# Production stage
57+
# ==============================================================================
58+
# Agent Server build stage
59+
# ==============================================================================
60+
FROM --platform=linux/amd64 node:18-alpine AS agent-server-builder
61+
62+
WORKDIR /workspace
63+
64+
# Copy agent-server from local directory
65+
COPY agent-server/nodejs /workspace/agent-server
66+
67+
WORKDIR /workspace/agent-server
68+
69+
# Install dependencies
70+
RUN npm install
71+
72+
# ==============================================================================
73+
# Production stage - DevTools + Agent Server
74+
# ==============================================================================
6375
FROM --platform=linux/amd64 nginx:alpine
6476

77+
# Install Node.js (required for agent-server)
78+
RUN apk add --no-cache nodejs npm bash
79+
6580
# Copy the built DevTools frontend
6681
COPY --from=builder /workspace/devtools/devtools-frontend/out/Default/gen/front_end /usr/share/nginx/html
6782

83+
# Copy agent-server from builder stage
84+
COPY --from=agent-server-builder /workspace/agent-server /opt/agent-server
85+
6886
# Copy nginx config
6987
COPY docker/nginx.conf /etc/nginx/conf.d/default.conf
7088

71-
EXPOSE 8000
89+
# Create startup script to run both nginx and agent-server
90+
RUN cat > /usr/local/bin/start-services.sh <<'EOFSCRIPT'
91+
#!/bin/bash
92+
set -e
93+
94+
# Start nginx in background
95+
echo "Starting nginx..."
96+
nginx -g "daemon off;" &
97+
NGINX_PID=$!
98+
99+
# Start agent-server in background
100+
echo "Starting agent-server..."
101+
cd /opt/agent-server
102+
node start.js &
103+
AGENT_PID=$!
104+
105+
# Wait for both processes
106+
echo "DevTools running on http://localhost:8000"
107+
echo "Agent Server WebSocket on ws://localhost:8082"
108+
echo "Agent Server HTTP API on http://localhost:8080"
109+
echo "Press Ctrl+C to stop..."
110+
111+
# Trap SIGTERM and SIGINT
112+
trap "kill $NGINX_PID $AGENT_PID 2>/dev/null || true; exit 0" SIGTERM SIGINT
113+
114+
# Wait for either process to exit
115+
wait -n $NGINX_PID $AGENT_PID
116+
117+
# If one exits, kill the other
118+
kill $NGINX_PID $AGENT_PID 2>/dev/null || true
119+
EOFSCRIPT
120+
121+
RUN chmod +x /usr/local/bin/start-services.sh
122+
123+
# Environment variables for agent-server
124+
ENV PORT=8082
125+
ENV API_PORT=8080
126+
ENV HOST=0.0.0.0
127+
ENV CDP_HOST=host.docker.internal
128+
ENV CDP_PORT=9222
129+
130+
EXPOSE 8000 8080 8082
131+
132+
# Override the nginx entrypoint
133+
ENTRYPOINT []
134+
CMD ["/bin/bash", "/usr/local/bin/start-services.sh"]

docker/README.md

Lines changed: 109 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
# Docker Setup for Chrome DevTools Frontend
1+
# Docker Setup for Browser Operator DevTools + Agent Server
22

3-
This directory contains Docker configuration files for building and running the Chrome DevTools Frontend in a containerized environment.
3+
This directory contains Docker configuration files for building and running the Browser Operator DevTools Frontend with integrated Agent Server in a containerized environment.
44

55
## Overview
66

77
The Docker setup uses a multi-stage build process:
8-
1. **Build Stage**: Compiles the DevTools frontend using the full development environment
9-
2. **Production Stage**: Serves only the built files using Nginx (lightweight, ~50MB final image)
8+
1. **DevTools Build Stage**: Compiles the DevTools frontend using the full development environment
9+
2. **Agent Server Build Stage**: Installs Node.js dependencies for the agent server
10+
3. **Production Stage**: Serves DevTools via Nginx + runs Agent Server (Node.js) in the same container
1011

1112
## Prerequisites
1213

@@ -22,11 +23,8 @@ The Docker setup uses a multi-stage build process:
2223
From the repository root directory:
2324

2425
```bash
25-
# Build with automated mode (default - bypasses OAuth, auto-enables evaluation)
26-
docker build -f docker/Dockerfile -t browser-operator-automated .
27-
28-
# Build with normal mode (requires manual authentication)
29-
docker build -f docker/Dockerfile --build-arg AUTOMATED_MODE=false -t browser-operator-manual .
26+
# Build DevTools image (AUTOMATED_MODE is always enabled)
27+
docker build -f docker/Dockerfile -t browser-operator-devtools .
3028

3129
# Or use docker-compose (recommended)
3230
docker-compose -f docker/docker-compose.yml build
@@ -35,67 +33,143 @@ docker-compose -f docker/docker-compose.yml build
3533
### Running the Container
3634

3735
```bash
38-
# Automated mode (no authentication required, evaluation auto-enabled)
39-
docker run -d -p 8000:8000 --name browser-operator-automated browser-operator-automated
40-
41-
# Manual mode (requires OAuth/API key setup)
42-
docker run -d -p 8000:8000 --name browser-operator-manual browser-operator-manual
36+
# Run DevTools container (AUTOMATED_MODE enabled by default)
37+
docker run -d -p 8000:8000 --name browser-operator-devtools browser-operator-devtools
4338

4439
# Or using docker-compose (recommended)
4540
docker-compose -f docker/docker-compose.yml up -d
4641
```
4742

48-
The DevTools will be available at: http://localhost:8000
43+
The services will be available at:
44+
- **DevTools UI**: http://localhost:8000
45+
- **Agent Server HTTP API**: http://localhost:8080
46+
- **Agent Server WebSocket**: ws://localhost:8082
4947

5048
### Accessing DevTools
5149

52-
Once the container is running, open Chrome or Chromium with:
50+
Once the container is running, open Chrome or Chromium with remote debugging enabled:
5351

5452
```bash
5553
# macOS
56-
/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --custom-devtools-frontend=http://localhost:8000/
54+
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" \
55+
--remote-debugging-port=9222 \
56+
--remote-allow-origins="*" \
57+
--auto-open-devtools-for-tabs \
58+
--user-data-dir=/tmp/chrome-debug-profile \
59+
--custom-devtools-frontend=http://localhost:8000/
5760

5861
# Linux
59-
google-chrome --custom-devtools-frontend=http://localhost:8000/
62+
google-chrome \
63+
--remote-debugging-port=9222 \
64+
--remote-allow-origins="*" \
65+
--auto-open-devtools-for-tabs \
66+
--user-data-dir=/tmp/chrome-debug-profile \
67+
--custom-devtools-frontend=http://localhost:8000/
6068

6169
# Windows
62-
chrome.exe --custom-devtools-frontend=http://localhost:8000/
70+
"C:\Program Files\Google\Chrome\Application\chrome.exe" \
71+
--remote-debugging-port=9222 \
72+
--remote-allow-origins="*" \
73+
--auto-open-devtools-for-tabs \
74+
--user-data-dir=C:\temp\chrome-debug-profile \
75+
--custom-devtools-frontend=http://localhost:8000/
6376
```
6477

78+
**Important flags:**
79+
- `--remote-debugging-port=9222` - Enables CDP for the Agent Server to connect
80+
- `--remote-allow-origins="*"` - Allows CDP connections from Docker containers
81+
- `--auto-open-devtools-for-tabs` - Automatically opens DevTools for new tabs (required for agent-server automation)
82+
- `--user-data-dir=/tmp/chrome-debug-profile` - Uses a temporary profile to avoid conflicts
83+
- `--custom-devtools-frontend=http://localhost:8000/` - Uses the Browser Operator DevTools
84+
85+
**Note:** Make sure to completely quit Chrome before starting it with these flags. On macOS, use `Cmd+Q` or run `killall "Google Chrome"`.
86+
6587
## File Structure
6688

6789
```
6890
docker/
69-
├── Dockerfile # Multi-stage build configuration
91+
├── Dockerfile # Multi-stage build (DevTools + Agent Server)
7092
├── .dockerignore # Files to exclude from Docker context
7193
├── nginx.conf # Nginx server configuration
7294
├── docker-compose.yml # Docker Compose configuration
7395
└── README.md # This file
96+
97+
../agent-server/ # Agent Server source code (included in build)
7498
```
7599

76-
## Automated Mode vs Manual Mode
100+
## Automated Mode (Always Enabled)
77101

78-
### Automated Mode (Default)
79-
- **Purpose**: Optimized for Docker/CI environments and automated workflows
80-
- **Authentication**: Bypasses OAuth panel - no manual setup required
81-
- **Evaluation**: Automatically enables evaluation mode for API wrapper connectivity
82-
- **Use cases**: Production deployments, CI/CD, headless automation, API integration
102+
This Docker image is built with **AUTOMATED_MODE** always enabled for seamless deployment:
83103

84-
### Manual Mode
85-
- **Purpose**: Standard interactive usage
86-
- **Authentication**: Requires OAuth setup or API key configuration
87-
- **Evaluation**: Manual enable/disable in settings
88-
- **Use cases**: Development, interactive testing, manual usage
104+
- **Authentication**: Bypasses OAuth panel - no manual setup required
105+
- **Evaluation**: Automatically enables evaluation mode for agent-server connectivity
106+
- **Use cases**: Production deployments, CI/CD, headless automation, API integration
89107

90108
```bash
91-
# Example automated mode workflow
92-
docker build -f docker/Dockerfile -t browser-operator-automated .
93-
docker run -d -p 8000:8000 --name browser-operator browser-operator-automated
109+
# Example workflow - ready to use immediately
110+
docker build -f docker/Dockerfile -t browser-operator-devtools .
111+
docker run -d -p 8000:8000 --name browser-operator browser-operator-devtools
94112

95113
# Ready to use immediately - no authentication required!
96-
# Evaluation server can connect automatically via WebSocket (ws://localhost:8080)
114+
# Agent server can connect automatically via WebSocket (ws://localhost:8082)
97115
```
98116

117+
**Note**: This approach matches the parent repository's `Dockerfile.devtools` which has proven to work "rock-solid" in production.
118+
119+
## Agent Server
120+
121+
The container includes a fully functional Agent Server that provides:
122+
123+
### WebSocket API (port 8082)
124+
- JSON-RPC 2.0 bidirectional communication
125+
- Browser agent lifecycle management
126+
- Direct CDP integration
127+
128+
### HTTP REST API (port 8080)
129+
- `POST /v1/responses` - Send tasks to browser agents
130+
- `POST /page/screenshot` - Capture screenshots via CDP
131+
- `POST /page/content` - Get HTML/text content
132+
- `GET /status` - Health check
133+
134+
### Configuration
135+
136+
The Agent Server runs with these default settings:
137+
- **WebSocket Port**: 8082
138+
- **HTTP API Port**: 8080
139+
- **Host**: 0.0.0.0 (listens on all interfaces)
140+
- **Authentication**: Disabled (automated mode)
141+
142+
To customize, you can override environment variables:
143+
144+
```bash
145+
docker run -d -p 8000:8000 -p 8080:8080 -p 8082:8082 \
146+
-e EVAL_SERVER_WS_PORT=8082 \
147+
-e EVAL_SERVER_HTTP_PORT=8080 \
148+
-e EVAL_SERVER_HOST=0.0.0.0 \
149+
browser-operator-devtools
150+
```
151+
152+
### Testing the Agent Server
153+
154+
```bash
155+
# Health check
156+
curl http://localhost:8080/status
157+
158+
# Send a task (requires browser with remote debugging)
159+
curl -X POST http://localhost:8080/v1/responses \
160+
-H "Content-Type: application/json" \
161+
-d '{
162+
"input": "Navigate to google.com",
163+
"url": "about:blank",
164+
"wait_timeout": 5000,
165+
"model": {
166+
"main_model": {"provider": "openai", "model": "gpt-4", "api_key": "sk-..."}
167+
}
168+
}'
169+
```
170+
171+
For more details on the Agent Server API, see `../agent-server/README.md`.
172+
99173
## Advanced Usage
100174

101175
### Development Mode

0 commit comments

Comments
 (0)