PallavAg · yordyi · Oct 29, 2024
diff --git a/README.md b/README.md
@@ -64,3 +64,67 @@ You can quit the script at any time by pressing `Ctrl+C` in the terminal.
 > [!CAUTION]
 > - **Security Risks:** This script allows claude to control your computer's mouse and keyboard and run bash commands. Use it at your own risk.
 > - **Responsibility:** By running this script, you assume all responsibility and liability for any results.
+
+## Streamlit-based GUI
+
+We have now added a modern, high-end, professional graphical user interface (GUI) using Streamlit. Follow the steps below to set up and use the new GUI.
+
+### Installation and Setup
+
+1. **Clone the repository:**
+
+   ```bash
+   git clone https://github.com/PallavAg/claude-computer-use-macos.git
+   cd claude-computer-use-macos
+   ```
+
+2. **Create a virtual environment + install dependencies:**
+
+   ```bash
+   python3.12 -m venv venv
+   source venv/bin/activate
+   pip3.12 install -r requirements.txt
+   ```
+
+3. **Set your Anthropic API key as an environment variable:**
+
+   ```bash
+   export ANTHROPIC_API_KEY="CLAUDE_API_KEY"
+   ```
+
+   Replace `CLAUDE_API_KEY` with your actual Anthropic API key. You find yours [here](https://console.anthropic.com/settings/keys).
+
+4. **Grant Accessibility Permissions:**
+
+   The script uses `pyautogui` to control mouse and keyboard events. On MacOS, you need to grant accessibility permissions. These popups should show automatically the first time you run the script so you can skip this step. But to manually provide permissions:
+
+   - Go to **System Preferences** > **Security & Privacy** > **Privacy** tab.
+   - Select **Accessibility** from the list on the left.
+   - Add your terminal application or Python interpreter to the list of allowed apps.
+
+### Usage
+
+You can run the Streamlit app by executing the following command:
+
+```bash
+streamlit run main.py
+```
+
+This will launch the Streamlit app in your default web browser. You can then interact with the assistant through the web-based GUI.
+
+### Screenshots
+
+Here are some screenshots of the new GUI:
+
+![Screenshot 1](screenshots/screenshot1.png)
+![Screenshot 2](screenshots/screenshot2.png)
+
+### Exiting the App
+
+You can quit the app at any time by closing the browser tab or pressing `Ctrl+C` in the terminal.
+
+## ⚠ Disclaimer
+
+> [!CAUTION]
+> - **Security Risks:** This script allows claude to control your computer's mouse and keyboard and run bash commands. Use it at your own risk.
+> - **Responsibility:** By running this script, you assume all responsibility and liability for any results.
diff --git a/main.py b/main.py
@@ -3,6 +3,7 @@
 import sys
 import json
 import base64
+import streamlit as st
 
 from computer_use_demo.loop import sampling_loop, APIProvider
 from computer_use_demo.tools import ToolResult
@@ -40,23 +41,23 @@ async def main():
     # Define callbacks (you can customize these)
     def output_callback(content_block):
         if isinstance(content_block, dict) and content_block.get("type") == "text":
-            print("Assistant:", content_block.get("text"))
+            st.write("Assistant:", content_block.get("text"))
 
     def tool_output_callback(result: ToolResult, tool_use_id: str):
         if result.output:
-            print(f"> Tool Output [{tool_use_id}]:", result.output)
+            st.write(f"> Tool Output [{tool_use_id}]:", result.output)
         if result.error:
-            print(f"!!! Tool Error [{tool_use_id}]:", result.error)
+            st.write(f"!!! Tool Error [{tool_use_id}]:", result.error)
         if result.base64_image:
             # Save the image to a file if needed
             os.makedirs("screenshots", exist_ok=True)
             image_data = result.base64_image
             with open(f"screenshots/screenshot_{tool_use_id}.png", "wb") as f:
                 f.write(base64.b64decode(image_data))
-            print(f"Took screenshot screenshot_{tool_use_id}.png")
+            st.image(f"screenshots/screenshot_{tool_use_id}.png", caption=f"Screenshot {tool_use_id}")
 
     def api_response_callback(response: APIResponse[BetaMessage]):
-        print(
+        st.write(
             "\n---------------\nAPI Response:\n",
             json.dumps(json.loads(response.text)["content"], indent=4),  # type: ignore
             "\n",
@@ -81,4 +82,4 @@ def api_response_callback(response: APIResponse[BetaMessage]):
     try:
         asyncio.run(main())
     except Exception as e:
-        print(f"Encountered Error:\n{e}")
+        st.write(f"Encountered Error:\n{e}")
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 anthropic[bedrock,vertex]>=0.37.1
 pillow
 PyAutoGUI
+streamlit
diff --git a/streamlit_app.py b/streamlit_app.py
@@ -0,0 +1,78 @@
+import streamlit as st
+import asyncio
+import os
+import json
+import base64
+
+from computer_use_demo.loop import sampling_loop, APIProvider
+from computer_use_demo.tools import ToolResult
+from anthropic.types.beta import BetaMessage, BetaMessageParam
+from anthropic import APIResponse
+
+# Define the Streamlit app layout and functionality
+def main():
+    st.title("Claude Computer Use Demo")
+    st.write("A modern, high-end, professional interface for interacting with Claude.")
+
+    # Set up your Anthropic API key and model
+    api_key = os.getenv("ANTHROPIC_API_KEY", "YOUR_API_KEY_HERE")
+    if api_key == "YOUR_API_KEY_HERE":
+        st.error("Please set your API key in the ANTHROPIC_API_KEY environment variable.")
+        return
+    provider = APIProvider.ANTHROPIC
+
+    # User input section
+    instruction = st.text_input("Enter your instruction:", "Save an image of a cat to the desktop.")
+
+    if st.button("Run"):
+        st.write(f"Starting Claude 'Computer Use' with instruction: '{instruction}'")
+
+        # Set up the initial messages
+        messages: list[BetaMessageParam] = [
+            {
+                "role": "user",
+                "content": instruction,
+            }
+        ]
+
+        # Define callbacks
+        def output_callback(content_block):
+            if isinstance(content_block, dict) and content_block.get("type") == "text":
+                st.write("Assistant:", content_block.get("text"))
+
+        def tool_output_callback(result: ToolResult, tool_use_id: str):
+            if result.output:
+                st.write(f"> Tool Output [{tool_use_id}]:", result.output)
+            if result.error:
+                st.write(f"!!! Tool Error [{tool_use_id}]:", result.error)
+            if result.base64_image:
+                # Save the image to a file if needed
+                os.makedirs("screenshots", exist_ok=True)
+                image_data = result.base64_image
+                with open(f"screenshots/screenshot_{tool_use_id}.png", "wb") as f:
+                    f.write(base64.b64decode(image_data))
+                st.image(f"screenshots/screenshot_{tool_use_id}.png", caption=f"Screenshot {tool_use_id}")
+
+        def api_response_callback(response: APIResponse[BetaMessage]):
+            st.write(
+                "\n---------------\nAPI Response:\n",
+                json.dumps(json.loads(response.text)["content"], indent=4),  # type: ignore
+                "\n",
+            )
+
+        # Run the sampling loop
+        asyncio.run(sampling_loop(
+            model="claude-3-5-sonnet-20241022",
+            provider=provider,
+            system_prompt_suffix="",
+            messages=messages,
+            output_callback=output_callback,
+            tool_output_callback=tool_output_callback,
+            api_response_callback=api_response_callback,
+            api_key=api_key,
+            only_n_most_recent_images=10,
+            max_tokens=4096,
+        ))
+
+if __name__ == "__main__":
+    main()