Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,67 @@ You can quit the script at any time by pressing `Ctrl+C` in the terminal.
> [!CAUTION]
> - **Security Risks:** This script allows claude to control your computer's mouse and keyboard and run bash commands. Use it at your own risk.
> - **Responsibility:** By running this script, you assume all responsibility and liability for any results.

## Streamlit-based GUI

We have now added a modern, high-end, professional graphical user interface (GUI) using Streamlit. Follow the steps below to set up and use the new GUI.

### Installation and Setup

1. **Clone the repository:**

```bash
git clone https://github.com/PallavAg/claude-computer-use-macos.git
cd claude-computer-use-macos
```

2. **Create a virtual environment + install dependencies:**

```bash
python3.12 -m venv venv
source venv/bin/activate
pip3.12 install -r requirements.txt
```

3. **Set your Anthropic API key as an environment variable:**

```bash
export ANTHROPIC_API_KEY="CLAUDE_API_KEY"
```

Replace `CLAUDE_API_KEY` with your actual Anthropic API key. You find yours [here](https://console.anthropic.com/settings/keys).

4. **Grant Accessibility Permissions:**

The script uses `pyautogui` to control mouse and keyboard events. On MacOS, you need to grant accessibility permissions. These popups should show automatically the first time you run the script so you can skip this step. But to manually provide permissions:

- Go to **System Preferences** > **Security & Privacy** > **Privacy** tab.
- Select **Accessibility** from the list on the left.
- Add your terminal application or Python interpreter to the list of allowed apps.

### Usage

You can run the Streamlit app by executing the following command:

```bash
streamlit run main.py
```

This will launch the Streamlit app in your default web browser. You can then interact with the assistant through the web-based GUI.

### Screenshots

Here are some screenshots of the new GUI:

![Screenshot 1](screenshots/screenshot1.png)
![Screenshot 2](screenshots/screenshot2.png)

### Exiting the App

You can quit the app at any time by closing the browser tab or pressing `Ctrl+C` in the terminal.

## ⚠ Disclaimer

> [!CAUTION]
> - **Security Risks:** This script allows claude to control your computer's mouse and keyboard and run bash commands. Use it at your own risk.
> - **Responsibility:** By running this script, you assume all responsibility and liability for any results.
13 changes: 7 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import sys
import json
import base64
import streamlit as st

from computer_use_demo.loop import sampling_loop, APIProvider
from computer_use_demo.tools import ToolResult
Expand Down Expand Up @@ -40,23 +41,23 @@ async def main():
# Define callbacks (you can customize these)
def output_callback(content_block):
if isinstance(content_block, dict) and content_block.get("type") == "text":
print("Assistant:", content_block.get("text"))
st.write("Assistant:", content_block.get("text"))

def tool_output_callback(result: ToolResult, tool_use_id: str):
if result.output:
print(f"> Tool Output [{tool_use_id}]:", result.output)
st.write(f"> Tool Output [{tool_use_id}]:", result.output)
if result.error:
print(f"!!! Tool Error [{tool_use_id}]:", result.error)
st.write(f"!!! Tool Error [{tool_use_id}]:", result.error)
if result.base64_image:
# Save the image to a file if needed
os.makedirs("screenshots", exist_ok=True)
image_data = result.base64_image
with open(f"screenshots/screenshot_{tool_use_id}.png", "wb") as f:
f.write(base64.b64decode(image_data))
print(f"Took screenshot screenshot_{tool_use_id}.png")
st.image(f"screenshots/screenshot_{tool_use_id}.png", caption=f"Screenshot {tool_use_id}")

def api_response_callback(response: APIResponse[BetaMessage]):
print(
st.write(
"\n---------------\nAPI Response:\n",
json.dumps(json.loads(response.text)["content"], indent=4), # type: ignore
"\n",
Expand All @@ -81,4 +82,4 @@ def api_response_callback(response: APIResponse[BetaMessage]):
try:
asyncio.run(main())
except Exception as e:
print(f"Encountered Error:\n{e}")
st.write(f"Encountered Error:\n{e}")
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
anthropic[bedrock,vertex]>=0.37.1
pillow
PyAutoGUI
streamlit
78 changes: 78 additions & 0 deletions streamlit_app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
import streamlit as st
import asyncio
import os
import json
import base64

from computer_use_demo.loop import sampling_loop, APIProvider
from computer_use_demo.tools import ToolResult
from anthropic.types.beta import BetaMessage, BetaMessageParam
from anthropic import APIResponse

# Define the Streamlit app layout and functionality
def main():
st.title("Claude Computer Use Demo")
st.write("A modern, high-end, professional interface for interacting with Claude.")

# Set up your Anthropic API key and model
api_key = os.getenv("ANTHROPIC_API_KEY", "YOUR_API_KEY_HERE")
if api_key == "YOUR_API_KEY_HERE":
st.error("Please set your API key in the ANTHROPIC_API_KEY environment variable.")
return
provider = APIProvider.ANTHROPIC

# User input section
instruction = st.text_input("Enter your instruction:", "Save an image of a cat to the desktop.")

if st.button("Run"):
st.write(f"Starting Claude 'Computer Use' with instruction: '{instruction}'")

# Set up the initial messages
messages: list[BetaMessageParam] = [
{
"role": "user",
"content": instruction,
}
]

# Define callbacks
def output_callback(content_block):
if isinstance(content_block, dict) and content_block.get("type") == "text":
st.write("Assistant:", content_block.get("text"))

def tool_output_callback(result: ToolResult, tool_use_id: str):
if result.output:
st.write(f"> Tool Output [{tool_use_id}]:", result.output)
if result.error:
st.write(f"!!! Tool Error [{tool_use_id}]:", result.error)
if result.base64_image:
# Save the image to a file if needed
os.makedirs("screenshots", exist_ok=True)
image_data = result.base64_image
with open(f"screenshots/screenshot_{tool_use_id}.png", "wb") as f:
f.write(base64.b64decode(image_data))
st.image(f"screenshots/screenshot_{tool_use_id}.png", caption=f"Screenshot {tool_use_id}")

def api_response_callback(response: APIResponse[BetaMessage]):
st.write(
"\n---------------\nAPI Response:\n",
json.dumps(json.loads(response.text)["content"], indent=4), # type: ignore
"\n",
)

# Run the sampling loop
asyncio.run(sampling_loop(
model="claude-3-5-sonnet-20241022",
provider=provider,
system_prompt_suffix="",
messages=messages,
output_callback=output_callback,
tool_output_callback=tool_output_callback,
api_response_callback=api_response_callback,
api_key=api_key,
only_n_most_recent_images=10,
max_tokens=4096,
))

if __name__ == "__main__":
main()