From ba400812847166c9d40768db6c30255a2861cc2d Mon Sep 17 00:00:00 2001
From: MarBeanAI <stevensav36@gmail.com>
Date: Sun, 27 Oct 2024 21:46:21 -0500
Subject: [PATCH] update dependencies, add activation script, and enhance
 system prompt

---
 .vscode/settings.json |  10 ++
 activate.sh           |  14 ++
 loop.py               | 262 +++++++++++++++++++++++++++-------
 requirements.txt      |  13 +-
 streamlit.py          | 323 +++++++++++++++++++++++++++++++-----------
 tools/computer.py     | 158 +++++++++++++--------
 6 files changed, 583 insertions(+), 197 deletions(-)
 create mode 100644 .vscode/settings.json
 create mode 100755 activate.sh
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 00000000..e085682f
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,10 @@
+{
+  "inlineChat.finishOnType": true,
+  "inlineChat.experimental.enableZoneToolbar": true,
+  "accessibility.voice.keywordActivation": "chatInContext",
+  "github.copilot.chat.experimental.inlineChatCompletionTrigger.enabled": true,
+  "github.copilot.chat.experimental.inlineChatHint.enabled": true,
+  "gitlens.ai.experimental.model": "anthropic:claude-3-5-sonnet-20240620",
+  "gitlens.ai.experimental.openai.url": "",
+  "diffEditor.codeLens": true
+}
diff --git a/activate.sh b/activate.sh
new file mode 100755
index 00000000..377e75aa
--- /dev/null
+++ b/activate.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+source venv/bin/activate
+export PYTHONPATH=$PYTHONPATH:$(pwd)
+
+echo "Virtual environment activated!"
+echo ""
+echo "To start the application:"
+echo "1. Set your API key:"
+echo "   export ANTHROPIC_API_KEY=your_api_key_here"
+echo "2. Set display dimensions (recommended):"
+echo "   export WIDTH=1280"
+echo "   export HEIGHT=800"
+echo "3. Run the Streamlit app:"
+echo "   streamlit run streamlit.py"
diff --git a/loop.py b/loop.py
index 887eeb6c..85768020 100644
--- a/loop.py
+++ b/loop.py
@@ -31,69 +31,222 @@ class APIProvider(StrEnum):
     ANTHROPIC = "anthropic"
     BEDROCK = "bedrock"
     VERTEX = "vertex"
+    BRICKS = "bricks"
 
 
 PROVIDER_TO_DEFAULT_MODEL_NAME: dict[APIProvider, str] = {
     APIProvider.ANTHROPIC: "claude-3-5-sonnet-20241022",
     APIProvider.BEDROCK: "anthropic.claude-3-5-sonnet-20241022-v2:0",
     APIProvider.VERTEX: "claude-3-5-sonnet-v2@20241022",
+    APIProvider.BRICKS: "claude-3-5-sonnet-20241022",
 }
 
-
 # This system prompt is optimized for the Docker environment in this repository and
 # specific tool combinations enabled.
 # We encourage modifying this system prompt to ensure the model has context for the
 # environment it is running in, and to provide any additional information that may be
 # helpful for the task at hand.
-SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
-* You are utilizing a macOS Sonoma 15.7 environment using {platform.machine()} architecture with internet access.
-* You can install applications using homebrew with your bash tool. Use curl instead of wget.
-* To open Chrome, please just click on the Chrome icon in the Dock or use Spotlight.
-* Using bash tool you can start GUI applications. GUI apps can be launched directly or with `open -a "Application Name"`. GUI apps will appear natively within macOS, but they may take some time to appear. Take a screenshot to confirm it did.
-* When using your bash tool with commands that are expected to output very large quantities of text, redirect into a tmp file and use str_replace_editor or `grep -n -B <lines before> -A <lines after> <query> <filename>` to confirm output.
-* When viewing a page it can be helpful to zoom out so that you can see everything on the page. In Chrome, use Command + "-" to zoom out or Command + "+" to zoom in.
-* When using your computer function calls, they take a while to run and send back to you. Where possible/feasible, try to chain multiple of these calls all into one function calls request.
-* The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
-</SYSTEM_CAPABILITY>
-<IMPORTANT>
-* When using Chrome, if any first-time setup dialogs appear, IGNORE THEM. Instead, click directly in the address bar and enter the appropriate search term or URL there.
-* If the item you are looking at is a pdf, if after taking a single screenshot of the pdf it seems that you want to read the entire document instead of trying to continue to read the pdf from your screenshots + navigation, determine the URL, use curl to download the pdf, install and use pdftotext (available via homebrew) to convert it to a text file, and then read that text file directly with your StrReplaceEditTool.
-</IMPORTANT>"""
-# SYSTEM_PROMPT = f"""<SYSTEM_CAPABILITY>
-# * You are utilizing a macOS Sonoma 15.7 environment using {platform.machine()} architecture with command line internet access.
-# * Package management:
-#   - Use homebrew for package installation
-#   - Use curl for HTTP requests
-#   - Use npm/yarn for Node.js packages
-#   - Use pip for Python packages
-
-# * Browser automation available via Playwright:
-#   - Supports Chrome, Firefox, and WebKit
-#   - Can handle JavaScript-heavy applications
-#   - Capable of screenshots, navigation, and interaction
-#   - Handles dynamic content loading
-
-# * System automation:
-#   - cliclick for simulating mouse/keyboard input
-#   - osascript for AppleScript commands
-#   - launchctl for managing services
-#   - defaults for reading/writing system preferences
-
-# * Development tools:
-#   - Standard Unix/Linux command line utilities
-#   - Git for version control
-#   - Docker for containerization
-#   - Common build tools (make, cmake, etc.)
-
-# * Output handling:
-#   - For large output, redirect to tmp files: command > /tmp/output.txt
-#   - Use grep with context: grep -n -B <before> -A <after> <query> <filename>
-#   - Stream processing with awk, sed, and other text utilities
-
-# * Note: Command line function calls may have latency. Chain multiple operations into single requests where feasible.
-
-# * The current date is {datetime.today().strftime('%A, %B %-d, %Y')}.
-# </SYSTEM_CAPABILITY>"""
+SYSTEM_PROMPT = f"""<SYSTEM_DEFINITION>
+You are an advanced AI assistant operating within a macOS Sequoia Version 15.1 (24B82) environment with comprehensive access to system resources and applications. Your purpose is to provide precise, efficient assistance while leveraging available tools optimally.
+
+<SYSTEM_SPECIFICATIONS>
+1. Hardware Configuration:
+   - Model: MacBook Pro (15-inch, 2018)
+   - Processor: 2.6 GHz 6-Core Intel Core i7
+   - Memory: 16 GB 2400 MHz DDR4
+   - Graphics: Intel UHD Graphics 630 1536 MB
+   - Display: 15.4-inch Retina (2880 × 1800)
+   - Architecture: {platform.machine()}
+   - Internet: Active connection available
+   - Time Zone: System configured
+   - Current Date: {datetime.today().strftime('%A, %B %-d, %Y')}
+
+<APPLICATION_ECOSYSTEM>
+1. Development Environment:
+   A. Code Editors & IDEs:
+      - Visual Studio Code & VS Code Insiders
+      - Xcode Beta
+      - Sublime Text
+      - Adobe Dreamweaver 2021
+      
+   B. Version Control & Collaboration:
+      - GitHub Desktop
+      - Git (command line)
+      - CodeForces Web Tool
+      
+   C. Container & Virtual Environments:
+      - Docker.app
+      - Docker CLI tools
+      
+   D. Development Tools:
+      - Terminal
+      - Command Line Tools
+      - Developer.app
+
+2. Professional Suites:
+   A. Microsoft Office:
+      - Word
+      - Excel
+      - PowerPoint
+      - OneNote
+      - Outlook
+      
+   B. Adobe Creative Cloud:
+      - Creative Cloud Manager
+      - Dreamweaver 2021
+      - Premiere Pro (Beta)
+      - Adobe UXP Developer Tools
+
+3. Web Browsers & Tools:
+   A. Primary Browsers:
+      - Safari & Safari Technology Preview
+      - Google Chrome Beta
+      - Firefox
+      - Microsoft Edge Dev
+      - Chromium
+      
+   B. Specialized Browsers:
+      - Tor Browser (Standard & Alpha)
+      
+   C. Browser Extensions:
+      - Grammarly for Safari
+      - Microsoft Bi for Safari
+
+4. AI & Machine Learning Tools:
+   - NVIDIA AI Workbench
+   - Code AI
+   - AI on Device (MacOS)
+   - 16x Prompt.app
+
+5. System Utilities:
+   A. File Management:
+      - Finder
+      - Preview
+      - The Unarchiver
+      - Unzip - RAR
+      
+   B. System Tools:
+      - System Settings
+      - Automator
+      - Mission Control
+      - Time Machine
+      - Activity Monitor
+      
+   C. Text Processing:
+      - TextEdit
+      - Notes
+      
+   D. Security:
+      - Passwords.app
+      - G Authenticator
+      - BitPay
+      - Wasabi Wallet
+
+6. Communication & Collaboration:
+   - Messages
+   - Mail
+   - FaceTime
+   - Discord
+   - Zoom
+   - Messenger
+   - TextNow
+
+7. Media & Entertainment:
+   - QuickTime Player
+   - Photos
+   - Music
+   - TV
+   - Podcasts
+   - Photo Booth
+
+8. Productivity & Organization:
+   - Calendar
+   - Reminders
+   - Stickies
+   - Clock
+   - Calculator
+   - Weather
+   - Maps
+
+<OPERATIONAL_CAPABILITIES>
+1. File System Access:
+   - Read/Write operations in user directories
+   - Application data access
+   - Temporary file creation
+   - Archive handling
+
+2. Network Operations:
+   - HTTP/HTTPS requests
+   - API interactions
+   - Download capabilities
+   - Network diagnostics
+
+3. Automation Framework:
+   A. System Automation:
+      - Shortcuts.app
+      - Automator workflows
+      - AppleScript execution
+      - Shell scripting
+      
+   B. Development Automation:
+      - Build tools
+      - Package managers
+      - Deployment scripts
+
+4. Security Protocols:
+   - Secure file operations
+   - Credential management
+   - Encryption capabilities
+   - Privacy controls
+
+<PERFORMANCE_GUIDELINES>
+1. Resource Management:
+   - Monitor system resources
+   - Optimize heavy operations
+   - Cache management
+   - Background process awareness
+
+2. Error Handling:
+   - Graceful failure recovery
+   - User feedback
+   - Logging capabilities
+   - Debug information
+
+3. Operation Chaining:
+   - Minimize command calls
+   - Batch operations
+   - Efficient workflows
+   - Resource pooling
+
+<INTERACTION_PROTOCOL>
+For each user interaction, I will:
+1. Analyze request requirements
+2. Identify optimal tools/applications
+3. Validate resource availability
+4. Plan execution strategy
+5. Provide clear documentation
+6. Monitor execution
+7. Handle errors gracefully
+8. Confirm successful completion
+
+<RESPONSE_FORMAT>
+Each response will include:
+1. <thinking> tags for analysis
+2. Task acknowledgment
+3. Resource identification
+4. Step-by-step execution plan
+5. Clear documentation
+6. Error handling procedures
+7. Success confirmation
+
+<LIMITATIONS_AWARENESS>
+- Respect system permissions
+- Handle resource constraints
+- Consider operation timing
+- Maintain security protocols
+- Preserve user privacy
+- Account for network latency"""
+
 
 async def sampling_loop(
     *,
@@ -106,7 +259,7 @@ async def sampling_loop(
     api_response_callback: Callable[[APIResponse[BetaMessage]], None],
     api_key: str,
     only_n_most_recent_images: int | None = None,
-    max_tokens: int = 4096,
+    max_tokens: int = 8192,
 ):
     """
     Agentic sampling loop for the assistant/tool interaction of computer use.
@@ -130,6 +283,13 @@ async def sampling_loop(
             client = AnthropicVertex()
         elif provider == APIProvider.BEDROCK:
             client = AnthropicBedrock()
+        elif provider == APIProvider.BRICKS:
+            client = Anthropic(
+                api_key=api_key,
+                base_url="https://api.trybricks.ai/api/providers/anthropic",
+            )
+        else:
+            raise ValueError(f"Unsupported provider: {provider}")
 
         # Call the API
         # we use raw_response to provide debug information to streamlit. Your
diff --git a/requirements.txt b/requirements.txt
index 7457160b..f01e98fb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,8 @@
-streamlit>=1.38.0
-anthropic[bedrock,vertex]>=0.37.1
-jsonschema==4.22.0
-boto3>=1.28.57
-google-auth<3,>=2
-python-dotenv>=1.0.1
+anthropic==0.37.1
+python-dotenv>=1.0.0
+streamlit>=1.28.0
 pyautogui>=0.9.54
+keyboard>=0.13.5
+boto3>=1.29.0
+google-auth>=2.23.4
+Pillow>=10.0.0
diff --git a/streamlit.py b/streamlit.py
index a57a5607..9a095b92 100644
--- a/streamlit.py
+++ b/streamlit.py
@@ -10,30 +10,33 @@
 from enum import StrEnum
 from functools import partial
 from pathlib import PosixPath
-from typing import cast
+from typing import cast, Any
+import json
 
-import streamlit as st
 from anthropic import APIResponse
-from anthropic.types import (
-    TextBlock,
-)
-from anthropic.types.beta import BetaMessage, BetaTextBlock, BetaToolUseBlock
+from anthropic.types import Message
+from anthropic.types.beta import BetaMessage, BetaToolUseBlock
 from anthropic.types.tool_use_block import ToolUseBlock
-from streamlit.delta_generator import DeltaGenerator
+from dotenv import load_dotenv
 
+import streamlit as st
+from streamlit.components.v1 import html
 from loop import (
     PROVIDER_TO_DEFAULT_MODEL_NAME,
     APIProvider,
     sampling_loop,
 )
+from streamlit.delta_generator import DeltaGenerator
 from tools import ToolResult
-from dotenv import load_dotenv
 
 load_dotenv()
 
+# Rest of the file remains unchanged...
 
 CONFIG_DIR = PosixPath("~/.anthropic").expanduser()
 API_KEY_FILE = CONFIG_DIR / "api_key"
+
+# Custom CSS for styling and animations
 STREAMLIT_STYLE = """
 <style>
     /* Hide chat input while agent loop is running */
@@ -41,11 +44,105 @@
     .stApp[data-test-script-state=running] .stChatInput textarea {
         display: none;
     }
-     /* Hide the streamlit deploy button */
+    /* Hide the streamlit deploy button */
     .stDeployButton {
         visibility: hidden;
     }
+    /* Mouse tracker styles */
+    #mouse-tracker {
+        position: fixed;
+        width: 20px;
+        height: 20px;
+        background: rgba(255, 0, 0, 0.3);
+        border-radius: 50%;
+        pointer-events: none;
+        z-index: 9999;
+        transition: all 0.1s ease;
+        display: none;
+    }
+    /* Click animation */
+    .click-animation {
+        position: fixed;
+        width: 40px;
+        height: 40px;
+        border: 2px solid red;
+        border-radius: 50%;
+        pointer-events: none;
+        z-index: 9998;
+        animation: clickRipple 0.5s ease-out;
+    }
+    @keyframes clickRipple {
+        0% {
+            transform: scale(0.5);
+            opacity: 1;
+        }
+        100% {
+            transform: scale(1.5);
+            opacity: 0;
+        }
+    }
+    /* Auto scroll container */
+    .chat-container {
+        height: calc(100vh - 200px);
+        overflow-y: auto;
+        scroll-behavior: smooth;
+    }
 </style>
+
+<div id="mouse-tracker"></div>
+
+<script>
+    // Mouse tracking
+    const tracker = document.getElementById('mouse-tracker');
+    let lastX = 0;
+    let lastY = 0;
+    
+    function updateMousePosition(x, y) {
+        tracker.style.display = 'block';
+        tracker.style.left = x + 'px';
+        tracker.style.top = y + 'px';
+        lastX = x;
+        lastY = y;
+    }
+    
+    // Click animation
+    function createClickAnimation(x, y) {
+        const clickEffect = document.createElement('div');
+        clickEffect.className = 'click-animation';
+        clickEffect.style.left = (x - 20) + 'px';
+        clickEffect.style.top = (y - 20) + 'px';
+        document.body.appendChild(clickEffect);
+        
+        setTimeout(() => {
+            clickEffect.remove();
+        }, 500);
+    }
+    
+    // User controls toggle
+    let controlsEnabled = true;
+    document.addEventListener('keydown', (e) => {
+        if (e.key === ' ' && e.metaKey) {  // Space + Cmd
+            controlsEnabled = !controlsEnabled;
+            const event = new CustomEvent('controlsToggle', { detail: controlsEnabled });
+            window.dispatchEvent(event);
+        }
+    });
+    
+    // Auto scroll
+    function scrollToBottom() {
+        const container = document.querySelector('.chat-container');
+        if (container) {
+            container.scrollTop = container.scrollHeight;
+        }
+    }
+    
+    // Expose functions to Python
+    window.streamlitFunctions = {
+        updateMousePosition,
+        createClickAnimation,
+        scrollToBottom
+    };
+</script>
 """
 
 WARNING_TEXT = ""
@@ -58,16 +155,16 @@ class Sender(StrEnum):
 
 
 def setup_state():
+    """Initialize session state variables"""
     if "messages" not in st.session_state:
         st.session_state.messages = []
     if "api_key" not in st.session_state:
-        # Try to load API key from file first, then environment
         st.session_state.api_key = load_from_storage("api_key") or os.getenv(
             "ANTHROPIC_API_KEY", ""
         )
     if "provider" not in st.session_state:
         st.session_state.provider = (
-            os.getenv("API_PROVIDER", "anthropic") or APIProvider.ANTHROPIC
+            os.getenv("API_PROVIDER", "bricks") or APIProvider.BRICKS
         )
     if "provider_radio" not in st.session_state:
         st.session_state.provider_radio = st.session_state.provider
@@ -85,6 +182,8 @@ def setup_state():
         st.session_state.custom_system_prompt = load_from_storage("system_prompt") or ""
     if "hide_images" not in st.session_state:
         st.session_state.hide_images = False
+    if "controls_enabled" not in st.session_state:
+        st.session_state.controls_enabled = True
 
 
 def _reset_model():
@@ -93,6 +192,10 @@ def _reset_model():
     ]
 
 
+def toggle_controls():
+    st.session_state.controls_enabled = not st.session_state.controls_enabled
+
+
 async def main():
     """Render loop for streamlit"""
     setup_state()
@@ -101,7 +204,18 @@ async def main():
 
     st.title("Claude Computer Use for Mac")
 
-    st.markdown("""This is from [Mac Computer Use](https://github.com/deedy/mac_computer_use), a fork of [Anthropic Computer Use](https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/README.md) to work natively on Mac.""")
+    # User controls toggle button (only visible to users)
+    col1, col2 = st.columns([3, 1])
+    with col2:
+        st.button("Toggle Controls (⌘ + Space)", on_click=toggle_controls)
+        if st.session_state.controls_enabled:
+            st.success("Controls Enabled")
+        else:
+            st.error("Controls Disabled")
+
+    st.markdown(
+        """This is from [Mac Computer Use](https://github.com/deedy/mac_computer_use), a fork of [Anthropic Computer Use](https://github.com/anthropics/anthropic-quickstarts/blob/main/computer-use-demo/README.md) to work natively on Mac."""
+    )
 
     with st.sidebar:
 
@@ -122,9 +236,14 @@ def _reset_api_provider():
 
         st.text_input("Model", key="model")
 
-        if st.session_state.provider == APIProvider.ANTHROPIC:
+        if st.session_state.provider in [APIProvider.ANTHROPIC, APIProvider.BRICKS]:
+            api_key_label = (
+                "BricksAI Secret Key"
+                if st.session_state.provider == APIProvider.BRICKS
+                else "Anthropic API Key"
+            )
             st.text_input(
-                "Anthropic API Key",
+                api_key_label,
                 type="password",
                 key="api_key",
                 on_change=lambda: save_to_storage("api_key", st.session_state.api_key),
@@ -151,7 +270,7 @@ def _reset_api_provider():
                 st.session_state.clear()
                 setup_state()
 
-                subprocess.run("pkill Xvfb; pkill tint2", shell=True)  # noqa: ASYNC221
+                subprocess.run("pkill Xvfb; pkill tint2", shell=True, check=True)  # noqa: ASYNC221
                 await asyncio.sleep(1)
                 subprocess.run("./start_all.sh", shell=True)  # noqa: ASYNC221
 
@@ -170,72 +289,84 @@ def _reset_api_provider():
     )
 
     with chat:
-        # render past chats
-        for message in st.session_state.messages:
-            if isinstance(message["content"], str):
-                _render_message(message["role"], message["content"])
-            elif isinstance(message["content"], list):
-                for block in message["content"]:
-                    # the tool result we send back to the Anthropic API isn't sufficient to render all details,
-                    # so we store the tool use responses
-                    if isinstance(block, dict) and block["type"] == "tool_result":
-                        _render_message(
-                            Sender.TOOL, st.session_state.tools[block["tool_use_id"]]
-                        )
-                    else:
-                        _render_message(
-                            message["role"],
-                            cast(BetaTextBlock | BetaToolUseBlock, block),
-                        )
-
-        # render past http exchanges
-        for identity, response in st.session_state.responses.items():
-            _render_api_response(response, identity, http_logs)
-
-        # render past chats
-        if new_message:
-            st.session_state.messages.append(
-                {
-                    "role": Sender.USER,
-                    "content": [TextBlock(type="text", text=new_message)],
-                }
-            )
-            _render_message(Sender.USER, new_message)
-
-        try:
-            most_recent_message = st.session_state["messages"][-1]
-        except IndexError:
-            return
-
-        if most_recent_message["role"] is not Sender.USER:
-            # we don't have a user message to respond to, exit early
-            return
-
-        with st.spinner("Running Agent..."):
-            # run the agent sampling loop with the newest message
-            st.session_state.messages = await sampling_loop(
-                system_prompt_suffix=st.session_state.custom_system_prompt,
-                model=st.session_state.model,
-                provider=st.session_state.provider,
-                messages=st.session_state.messages,
-                output_callback=partial(_render_message, Sender.BOT),
-                tool_output_callback=partial(
-                    _tool_output_callback, tool_state=st.session_state.tools
-                ),
-                api_response_callback=partial(
-                    _api_response_callback,
-                    tab=http_logs,
-                    response_state=st.session_state.responses,
-                ),
-                api_key=st.session_state.api_key,
-                only_n_most_recent_images=st.session_state.only_n_most_recent_images,
-            )
+        # Create a container for auto-scrolling
+        chat_container = st.container()
+        with chat_container:
+            # render past chats
+            for message in st.session_state.messages:
+                if isinstance(message["content"], str):
+                    _render_message(message["role"], message["content"])
+                elif isinstance(message["content"], list):
+                    for block in message["content"]:
+                        if isinstance(block, dict) and block["type"] == "tool_result":
+                            _render_message(
+                                Sender.TOOL,
+                                st.session_state.tools[block["tool_use_id"]],
+                            )
+                        else:
+                            _render_message(
+                                message["role"],
+                                cast(Any, block),
+                            )
+
+            # render past http exchanges
+            for identity, response in st.session_state.responses.items():
+                _render_api_response(response, identity, http_logs)
+
+            # render new message
+            if new_message:
+                st.session_state.messages.append(
+                    {
+                        "role": Sender.USER,
+                        "content": [{"type": "text", "text": new_message}],
+                    }
+                )
+                _render_message(Sender.USER, new_message)
+
+            try:
+                most_recent_message = st.session_state["messages"][-1]
+            except IndexError:
+                return
+
+            if most_recent_message["role"] is not Sender.USER:
+                return
+
+            with st.spinner("Running Agent..."):
+                st.session_state.messages = await sampling_loop(
+                    system_prompt_suffix=st.session_state.custom_system_prompt,
+                    model=st.session_state.model,
+                    provider=st.session_state.provider,
+                    messages=st.session_state.messages,
+                    output_callback=partial(_render_message, Sender.BOT),
+                    tool_output_callback=partial(
+                        _tool_output_callback, tool_state=st.session_state.tools
+                    ),
+                    api_response_callback=partial(
+                        _api_response_callback,
+                        tab=http_logs,
+                        response_state=st.session_state.responses,
+                    ),
+                    api_key=st.session_state.api_key,
+                    only_n_most_recent_images=st.session_state.only_n_most_recent_images,
+                )
+
+        # Auto scroll after rendering
+        html("""
+            <script>
+                window.streamlitFunctions.scrollToBottom();
+            </script>
+        """)
 
 
 def validate_auth(provider: APIProvider, api_key: str | None):
-    if provider == APIProvider.ANTHROPIC:
+    if provider in [APIProvider.ANTHROPIC, APIProvider.BRICKS]:
         if not api_key:
-            return "Enter your Anthropic API key in the sidebar to continue."
+            key_type = (
+                "BricksAI Secret Key"
+                if provider == APIProvider.BRICKS
+                else "Anthropic API Key"
+            )
+            return f"Enter your {key_type} in the sidebar to continue."
     if provider == APIProvider.BEDROCK:
         import boto3
 
@@ -285,9 +416,7 @@ def _api_response_callback(
     tab: DeltaGenerator,
     response_state: dict[str, APIResponse[BetaMessage]],
 ):
-    """
-    Handle an API response by storing it to state and rendering it.
-    """
+    """Handle an API response by storing it to state and rendering it."""
     response_id = datetime.now().isoformat()
     response_state[response_id] = response
     _render_api_response(response, response_id, tab)
@@ -300,6 +429,33 @@ def _tool_output_callback(
     tool_state[tool_id] = tool_output
     _render_message(Sender.TOOL, tool_output)
 
+    # Update mouse tracker for mouse movements
+    if hasattr(tool_output, "output") and "cliclick m:" in str(tool_output.output):
+        coords = str(tool_output.output).split("m:")[1].strip().split(",")
+        if len(coords) == 2:
+            html(f"""
+                <script>
+                    window.streamlitFunctions.updateMousePosition({coords[0]}, {coords[1]});
+                </script>
+            """)
+
+    # Show click animation for clicks
+    if hasattr(tool_output, "output") and any(
+        cmd in str(tool_output.output) for cmd in ["c:", "rc:", "dc:", "mc:"]
+    ):
+        # Get current mouse position from tracker
+        html("""
+            <script>
+                const tracker = document.getElementById('mouse-tracker');
+                if (tracker) {
+                    window.streamlitFunctions.createClickAnimation(
+                        parseInt(tracker.style.left),
+                        parseInt(tracker.style.top)
+                    );
+                }
+            </script>
+        """)
+
 
 def _render_api_response(
     response: APIResponse[BetaMessage], response_id: str, tab: DeltaGenerator
@@ -320,10 +476,9 @@ def _render_api_response(
 
 def _render_message(
     sender: Sender,
-    message: str | BetaTextBlock | BetaToolUseBlock | ToolResult,
+    message: str | dict | BetaToolUseBlock | ToolResult,
 ):
     """Convert input from the user or output from the agent to a streamlit message."""
-    # streamlit's hotreloading breaks isinstance checks, so we need to check for class names
     is_tool_result = not isinstance(message, str) and (
         isinstance(message, ToolResult)
         or message.__class__.__name__ == "ToolResult"
@@ -348,8 +503,8 @@ def _render_message(
                 st.error(message.error)
             if message.base64_image and not st.session_state.hide_images:
                 st.image(base64.b64decode(message.base64_image))
-        elif isinstance(message, BetaTextBlock) or isinstance(message, TextBlock):
-            st.write(message.text)
+        elif isinstance(message, dict) and message.get("type") == "text":
+            st.write(message.get("text", ""))
         elif isinstance(message, BetaToolUseBlock) or isinstance(message, ToolUseBlock):
             st.code(f"Tool Use: {message.name}\nInput: {message.input}")
         else:
diff --git a/tools/computer.py b/tools/computer.py
index 0e7646fb..35655496 100644
--- a/tools/computer.py
+++ b/tools/computer.py
@@ -2,22 +2,26 @@
 import base64
 import os
 import shlex
-import pyautogui
-import keyboard
 from enum import StrEnum
 from pathlib import Path
 from typing import Literal, TypedDict
 from uuid import uuid4
+from io import BytesIO
+from PIL import Image
 
 from anthropic.types.beta import BetaToolComputerUse20241022Param
 
 from .base import BaseAnthropicTool, ToolError, ToolResult
 from .run import run
 
+# Constants
 OUTPUT_DIR = "/tmp/outputs"
-
 TYPING_DELAY_MS = 12
 TYPING_GROUP_SIZE = 50
+MAX_IMAGE_SIZE = 5 * 1024 * 1024  # 5MB in bytes
+
+# Check if we're running in a codespace environment
+IS_CODESPACE = os.environ.get("CODESPACES") == "true"
 
 Action = Literal[
     "key",
@@ -63,11 +67,30 @@ def chunks(s: str, chunk_size: int) -> list[str]:
     return [s[i : i + chunk_size] for i in range(0, len(s), chunk_size)]
 
 
+def compress_image(image_data: bytes, max_size: int = MAX_IMAGE_SIZE) -> bytes:
+    """Compress image data until it's under the specified max size."""
+    img = Image.open(BytesIO(image_data))
+    quality = 95
+    output = BytesIO()
+
+    while True:
+        output.seek(0)
+        output.truncate()
+        img.save(output, format="PNG", optimize=True, quality=quality)
+        size = output.tell()
+
+        if size <= max_size or quality <= 5:
+            break
+
+        quality -= 5
+
+    return output.getvalue()
+
+
 class ComputerTool(BaseAnthropicTool):
     """
-    A tool that allows the agent to interact with the screen, keyboard, and mouse of the current macOS computer.
+    A tool that allows the agent to interact with the screen, keyboard, and mouse.
     The tool parameters are defined by Anthropic and are not editable.
-    Requires cliclick to be installed: brew install cliclick
     """
 
     name: Literal["computer"] = "computer"
@@ -76,7 +99,7 @@ class ComputerTool(BaseAnthropicTool):
     height: int
     display_num: int | None
 
-    _screenshot_delay = 1.0  # macOS is generally faster than X11
+    _screenshot_delay = 1.0
     _scaling_enabled = True
 
     @property
@@ -93,9 +116,13 @@ def to_params(self) -> BetaToolComputerUse20241022Param:
     def __init__(self):
         super().__init__()
 
-        self.width, self.height = pyautogui.size()
-        assert self.width and self.height, "WIDTH, HEIGHT must be set"
-        self.display_num = None  # macOS doesn't use X11 display numbers
+        # Set default dimensions
+        self.width = int(os.environ.get("WIDTH", 1366))
+        self.height = int(os.environ.get("HEIGHT", 768))
+        self.display_num = None
+
+        if IS_CODESPACE:
+            print("Running in codespace environment - some features may be limited")
 
     async def __call__(
         self,
@@ -106,6 +133,12 @@ async def __call__(
         **kwargs,
     ):
         print("Action: ", action, text, coordinate)
+
+        if IS_CODESPACE:
+            return ToolResult(
+                error="This action is not supported in codespace environment. This tool is designed for macOS systems."
+            )
+
         if action in ("mouse_move", "left_click_drag"):
             if coordinate is None:
                 raise ToolError(f"coordinate is required for {action}")
@@ -116,7 +149,9 @@ async def __call__(
             if not all(isinstance(i, int) and i >= 0 for i in coordinate):
                 raise ToolError(f"{coordinate} must be a tuple of non-negative ints")
 
-            x, y = self.scale_coordinates(ScalingSource.API, coordinate[0], coordinate[1])
+            x, y = self.scale_coordinates(
+                ScalingSource.API, coordinate[0], coordinate[1]
+            )
 
             if action == "mouse_move":
                 return await self.shell(f"cliclick m:{x},{y}")
@@ -129,45 +164,44 @@ async def __call__(
             if coordinate is not None:
                 raise ToolError(f"coordinate is not accepted for {action}")
             if not isinstance(text, str):
-                raise ToolError(output=f"{text} must be a string")
+                raise ToolError("Text input must be a string")
 
             if action == "key":
-                # Convert common key names to pyautogui format
+                # Use cliclick for key presses
                 key_map = {
-                    "Return": "enter",
-                    "space": "space",
-                    "Tab": "tab",
-                    "Left": "left",
-                    "Right": "right",
-                    "Up": "up",
-                    "Down": "down",
-                    "Escape": "esc",
-                    "command": "command",
-                    "cmd": "command",
-                    "alt": "alt",
-                    "shift": "shift",
-                    "ctrl": "ctrl"
+                    "Return": "kp:return",
+                    "space": "kp:space",
+                    "Tab": "kp:tab",
+                    "Left": "kp:arrow-left",
+                    "Right": "kp:arrow-right",
+                    "Up": "kp:arrow-up",
+                    "Down": "kp:arrow-down",
+                    "Escape": "kp:esc",
+                    "command": "kp:cmd",
+                    "cmd": "kp:cmd",
+                    "alt": "kp:alt",
+                    "shift": "kp:shift",
+                    "ctrl": "kp:ctrl",
                 }
 
                 try:
                     if "+" in text:
                         # Handle combinations like "ctrl+c"
                         keys = text.split("+")
-                        mapped_keys = [key_map.get(k.strip(), k.strip()) for k in keys]
-                        await asyncio.get_event_loop().run_in_executor(
-                            None, keyboard.press_and_release, '+'.join(mapped_keys)
-                        )
+                        mapped_keys = [
+                            key_map.get(k.strip(), f"kp:{k.strip()}") for k in keys
+                        ]
+                        cmd = "cliclick " + " ".join(mapped_keys)
                     else:
                         # Handle single keys
-                        mapped_key = key_map.get(text, text)
-                        await asyncio.get_event_loop().run_in_executor(
-                            None, keyboard.press_and_release, mapped_key
-                        )
+                        mapped_key = key_map.get(text, f"kp:{text}")
+                        cmd = f"cliclick {mapped_key}"
 
-                    return ToolResult(output=f"Pressed key: {text}", error=None, base64_image=None)
+                    return await self.shell(cmd)
 
                 except Exception as e:
-                    return ToolResult(output=None, error=str(e), base64_image=None)
+                    return ToolResult(error=str(e))
+
             elif action == "type":
                 results: list[ToolResult] = []
                 for chunk in chunks(text, TYPING_GROUP_SIZE):
@@ -200,7 +234,6 @@ async def __call__(
                     "cliclick p",
                     take_screenshot=False,
                 )
-                import pdb; pdb.set_trace()
                 if result.output:
                     x, y = map(int, result.output.strip().split(","))
                     x, y = self.scale_coordinates(ScalingSource.COMPUTER, x, y)
@@ -219,26 +252,35 @@ async def __call__(
 
     async def screenshot(self):
         """Take a screenshot of the current screen and return the base64 encoded image."""
+        if IS_CODESPACE:
+            return ToolResult(
+                error="Screenshot functionality is not available in codespace environment"
+            )
+
         output_dir = Path(OUTPUT_DIR)
         output_dir.mkdir(parents=True, exist_ok=True)
         path = output_dir / f"screenshot_{uuid4().hex}.png"
 
-        # Use macOS native screencapture
-        screenshot_cmd = f"screencapture -x {path}"
-        result = await self.shell(screenshot_cmd, take_screenshot=False)
+        try:
+            # Use screencapture on macOS
+            result = await self.shell(f"screencapture -x {path}")
+            if result.error:
+                return result
 
-        if self._scaling_enabled:
-            x, y = SCALE_DESTINATION['width'], SCALE_DESTINATION['height']
-            await self.shell(
-                f"sips -z {y} {x} {path}",  # sips is macOS native image processor
-                take_screenshot=False
-            )
-
-        if path.exists():
-            return result.replace(
-                base64_image=base64.b64encode(path.read_bytes()).decode()
-            )
-        raise ToolError(f"Failed to take screenshot: {result.error}")
+            if path.exists():
+                # Read the image and compress if necessary
+                image_data = path.read_bytes()
+                if len(image_data) > MAX_IMAGE_SIZE:
+                    image_data = compress_image(image_data)
+
+                return ToolResult(base64_image=base64.b64encode(image_data).decode())
+            return ToolResult(error="Screenshot file was not created")
+        except Exception as e:
+            return ToolResult(error=f"Failed to take screenshot: {str(e)}")
+        finally:
+            # Clean up the temporary file
+            if path.exists():
+                path.unlink()
 
     async def shell(self, command: str, take_screenshot=False) -> ToolResult:
         """Run a shell command and return the output, error, and optionally a screenshot."""
@@ -252,7 +294,9 @@ async def shell(self, command: str, take_screenshot=False) -> ToolResult:
 
         return ToolResult(output=stdout, error=stderr, base64_image=base64_image)
 
-    def scale_coordinates(self, source: ScalingSource, x: int, y: int) -> tuple[int, int]:
+    def scale_coordinates(
+        self, source: ScalingSource, x: int, y: int
+    ) -> tuple[int, int]:
         """
         Scale coordinates between original resolution and target resolution (SCALE_DESTINATION).
 
@@ -268,13 +312,15 @@ def scale_coordinates(self, source: ScalingSource, x: int, y: int) -> tuple[int,
             return x, y
 
         # Calculate scaling factors
-        x_scaling_factor = SCALE_DESTINATION['width'] / self.width
-        y_scaling_factor = SCALE_DESTINATION['height'] / self.height
+        x_scaling_factor = SCALE_DESTINATION["width"] / self.width
+        y_scaling_factor = SCALE_DESTINATION["height"] / self.height
 
         if source == ScalingSource.API:
             # Scale up from SCALE_DESTINATION to original resolution
-            if x > SCALE_DESTINATION['width'] or y > SCALE_DESTINATION['height']:
-                raise ToolError(f"Coordinates {x}, {y} are out of bounds for {SCALE_DESTINATION['width']}x{SCALE_DESTINATION['height']}")
+            if x > SCALE_DESTINATION["width"] or y > SCALE_DESTINATION["height"]:
+                raise ToolError(
+                    f"Coordinates {x}, {y} are out of bounds for {SCALE_DESTINATION['width']}x{SCALE_DESTINATION['height']}"
+                )
             return round(x / x_scaling_factor), round(y / y_scaling_factor)
         else:
             # Scale down from original resolution to SCALE_DESTINATION