Skip to content

Commit e6da37c

Browse files
authored
Merge pull request #1 from yana1205/toolkit
feat: provide bench runner and agent harness as a toolkit (ported from agent-bench-automation)
2 parents 3befa5e + 011a019 commit e6da37c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+3958
-2
lines changed

.dockerignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
**/__pycache__
2+
**/*.egg-info/
3+
**/.venv
4+
**/*Dockerfile

.gitignore

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/
130+
.vscode
131+
132+
.DS_Store

.pre-commit-config.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# This is an example configuration to enable detect-secrets in the pre-commit hook.
2+
# Add this file to the root folder of your repository.
3+
#
4+
# Read pre-commit hook framework https://pre-commit.com/ for more details about the structure of config yaml file and how git pre-commit would invoke each hook.
5+
#
6+
# This line indicates we will use the hook from ibm/detect-secrets to run scan during committing phase.
7+
repos:
8+
- repo: https://github.com/ibm/detect-secrets
9+
# If you desire to use a specific version of detect-secrets, you can replace `master` with other git revisions such as branch, tag or commit sha.
10+
# You are encouraged to use static refs such as tags, instead of branch name
11+
#
12+
# Running "pre-commit autoupdate" automatically updates rev to latest tag
13+
rev: 0.13.1+ibm.62.dss
14+
hooks:
15+
- id: detect-secrets # pragma: whitelist secret
16+
# Add options for detect-secrets-hook binary. You can run `detect-secrets-hook --help` to list out all possible options.
17+
# You may also run `pre-commit run detect-secrets` to preview the scan result.
18+
# when "--baseline" without "--use-all-plugins", pre-commit scan with just plugins in baseline file
19+
# when "--baseline" with "--use-all-plugins", pre-commit scan with all available plugins
20+
# add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets
21+
args: [--baseline, .secrets.baseline, --use-all-plugins]

.secrets.baseline

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
{
2+
"exclude": {
3+
"files": ".git|.venv|.pytest*|caa-agent|caa-bundle|^.secrets.baseline$",
4+
"lines": null
5+
},
6+
"generated_at": "2025-03-21T08:44:41Z",
7+
"plugins_used": [
8+
{
9+
"name": "AWSKeyDetector"
10+
},
11+
{
12+
"name": "ArtifactoryDetector"
13+
},
14+
{
15+
"name": "AzureStorageKeyDetector"
16+
},
17+
{
18+
"base64_limit": 4.5,
19+
"name": "Base64HighEntropyString"
20+
},
21+
{
22+
"name": "BasicAuthDetector"
23+
},
24+
{
25+
"name": "BoxDetector"
26+
},
27+
{
28+
"name": "CloudantDetector"
29+
},
30+
{
31+
"ghe_instance": "github.ibm.com",
32+
"name": "GheDetector"
33+
},
34+
{
35+
"name": "GitHubTokenDetector"
36+
},
37+
{
38+
"hex_limit": 3,
39+
"name": "HexHighEntropyString"
40+
},
41+
{
42+
"name": "IbmCloudIamDetector"
43+
},
44+
{
45+
"name": "IbmCosHmacDetector"
46+
},
47+
{
48+
"name": "JwtTokenDetector"
49+
},
50+
{
51+
"keyword_exclude": null,
52+
"name": "KeywordDetector"
53+
},
54+
{
55+
"name": "MailchimpDetector"
56+
},
57+
{
58+
"name": "NpmDetector"
59+
},
60+
{
61+
"name": "PrivateKeyDetector"
62+
},
63+
{
64+
"name": "SlackDetector"
65+
},
66+
{
67+
"name": "SoftlayerDetector"
68+
},
69+
{
70+
"name": "SquareOAuthDetector"
71+
},
72+
{
73+
"name": "StripeDetector"
74+
},
75+
{
76+
"name": "TwilioKeyDetector"
77+
}
78+
],
79+
"results": {},
80+
"version": "0.13.1+ibm.62.dss",
81+
"word_list": {
82+
"file": null,
83+
"hash": null
84+
}
85+
}

.whitesource

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"settingsInheritedFrom": "whitesource-config/whitesource-config@master"
3+
}

Makefile

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
2+
.PHONY: test
3+
test:
4+
@echo "Running Unit Tests"
5+
@pytest tests
6+
7+
.PHONY: format
8+
format:
9+
python -m black .
10+
11+
# Direct dependency is not allowed for Pypi packaging even if the dependant module is defined as extra dependencies.
12+
# Workaround: Move to manual installation by make
13+
.PHONY: install-detect-descret
14+
install-detect-descret:
15+
python -m pip install detect-secrets@git+https://github.com/ibm/detect-secrets.git@master#egg=detect-secrets
16+
17+
.PHONY: clean
18+
clean:
19+
@rm -rf build *.egg-info dist
20+
python -m pyclean -v .

README.md

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,32 @@
1-
# ITBench-Tools
2-
Code repository for tools as part of ITBench
1+
# IT-Bench Tools
2+
3+
This repository provides a toolkit for [ITBench](https://github.com/IBM/itbench), including the containerized components used to run and evaluate agents.
4+
5+
## 🎞️ Components
6+
7+
- **bench-runner**: Executes benchmark scenarios.
8+
- **agent-harness**: Wraps agents for interaction with IT Bench Service.
9+
10+
## 🛠️ Build and Push (Multi-Arch)
11+
12+
```bash
13+
bench_runner_name="icr.io/agent-bench/bench-runner-base:0.0.1"
14+
agent_harness_name="icr.io/agent-bench/agent-harness-base:0.0.1"
15+
16+
# Build and push bench-runner base image
17+
docker buildx build --platform linux/amd64,linux/arm64 \
18+
-f ./docker/bench-runner/Dockerfile \
19+
-t ${bench_runner_name} \
20+
. --push
21+
22+
# Build and push agent-harness base image
23+
docker buildx build --platform linux/amd64,linux/arm64 \
24+
-f ./docker/agent-harness/Dockerfile \
25+
-t ${agent_harness_name} \
26+
. --push
27+
```
28+
29+
## 📝 Notes
30+
31+
- Make sure `docker buildx` is installed and configured with a builder that supports multi-platform builds.
32+
- You need to be logged in to the container registry (`icr.io`) before pushing.

docker/agent-harness/Dockerfile

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
FROM python:3.11.10-slim
2+
3+
RUN apt update -y && apt install -y curl gnupg2
4+
RUN mkdir /etc/agent-benchmark
5+
COPY itbench_tools /etc/agent-benchmark/itbench_tools
6+
COPY pyproject.toml /etc/agent-benchmark/pyproject.toml
7+
COPY docker/agent-harness/entrypoint.sh /etc/entrypoint.sh
8+
RUN chmod +x /etc/entrypoint.sh
9+
RUN pip install --upgrade setuptools==70.0.0
10+
RUN pip install -e /etc/agent-benchmark --no-cache-dir
11+
RUN ln -sf /bin/bash /bin/sh
12+
13+
WORKDIR /etc/agent-benchmark
14+
15+
ENTRYPOINT ["/etc/entrypoint.sh"]

docker/agent-harness/entrypoint.sh

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/bin/bash
2+
3+
cd /etc/agent-benchmark
4+
5+
port="443"
6+
root_path="/bench-server"
7+
benchmark_timeout="300"
8+
9+
while [[ $# -gt 0 ]]; do
10+
case "$1" in
11+
--host) host="$2"; shift 2 ;;
12+
--port) port="$2"; shift 2 ;;
13+
--root_path) runner_id="$2"; shift 2 ;;
14+
--benchmark_timeout) token="$2"; shift 2 ;;
15+
*) echo "Unknown option: $1"; exit 1 ;;
16+
esac
17+
done
18+
19+
python itbench_tools/agent_harness/main.py \
20+
--agent_directory /etc/ciso-agent \
21+
-i /tmp/agent-manifest.json \
22+
-c /etc/ciso-agent/agent-harness.yaml \
23+
--host $host \
24+
--port $port \
25+
--root_path $root_path \
26+
--ssl \
27+
--benchmark_timeout $benchmark_timeout \
28+
--single_run
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
FROM python:3.12-slim
2+
3+
WORKDIR /app
4+
5+
RUN apt update -y && apt install -y curl gnupg2 jq
6+
RUN mkdir /app/agent-benchmark
7+
COPY itbench_tools /app/agent-benchmark/itbench_tools
8+
COPY pyproject.toml /app/agent-benchmark/pyproject.toml
9+
RUN pip install -e /app/agent-benchmark --no-cache-dir
10+
RUN ln -sf /bin/bash /bin/sh
11+
12+
WORKDIR /app/agent-benchmark

0 commit comments

Comments
 (0)