Skip to content

Commit 3c3d3d1

Browse files
authored
fix: add integration tests for codex-exec-mcp-server with execpolicy (#7617)
This PR introduces integration tests that run [codex-shell-tool-mcp](https://www.npmjs.com/package/@openai/codex-shell-tool-mcp) as a user would. Note that this requires running our fork of Bash, so we introduce a [DotSlash](https://dotslash-cli.com/) file for `bash` so that we can run the integration tests on multiple platforms without having to check the binaries into the repository. (As noted in the DotSlash file, it is slightly more heavyweight than necessary, which may be worth addressing as disk space in CI is limited: #7678.) To start, this PR adds two tests: - `list_tools()` makes the `list_tools` request to the MCP server and verifies we get the expected response - `accept_elicitation_for_prompt_rule()` defines a `prefix_rule()` with `decision="prompt"` and verifies the elicitation flow works as expected Though the `accept_elicitation_for_prompt_rule()` test **only works on Linux**, as this PR reveals that there are currently issues when running the Bash fork in a read-only sandbox on Linux. This will have to be fixed in a follow-up PR. Incidentally, getting this test run to correctly on macOS also requires a recent fix we made to `brew` that hasn't hit a mainline release yet, so getting CI green in this PR required #7680.
1 parent 3c087e8 commit 3c3d3d1

File tree

14 files changed

+516
-1
lines changed

14 files changed

+516
-1
lines changed

.github/workflows/rust-ci.yml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,19 @@ jobs:
407407
brew upgrade
408408
brew --version
409409
410+
# Some integration tests rely on DotSlash being installed.
411+
# See https://github.com/openai/codex/pull/7617.
412+
- name: Install DotSlash
413+
uses: facebook/install-dotslash@v2
414+
415+
- name: Pre-fetch DotSlash artifacts
416+
# The Bash wrapper is not available on Windows.
417+
if: ${{ !startsWith(matrix.runner, 'windows') }}
418+
shell: bash
419+
run: |
420+
set -euo pipefail
421+
dotslash -- fetch exec-server/tests/suite/bash
422+
410423
- uses: dtolnay/rust-toolchain@1.90
411424
with:
412425
targets: ${{ matrix.target }}

codex-rs/Cargo.lock

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ codex-utils-readiness = { path = "utils/readiness" }
9696
codex-utils-string = { path = "utils/string" }
9797
codex-windows-sandbox = { path = "windows-sandbox-rs" }
9898
core_test_support = { path = "core/tests/common" }
99+
exec_server_test_support = { path = "exec-server/tests/common" }
99100
mcp-types = { path = "mcp-types" }
100101
mcp_test_support = { path = "mcp-server/tests/common" }
101102

codex-rs/exec-server/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,5 +56,9 @@ tracing = { workspace = true }
5656
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
5757

5858
[dev-dependencies]
59+
assert_cmd = { workspace = true }
60+
exec_server_test_support = { workspace = true }
61+
maplit = { workspace = true }
5962
pretty_assertions = { workspace = true }
6063
tempfile = { workspace = true }
64+
which = { workspace = true }

codex-rs/exec-server/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ pub use posix::main_execve_wrapper;
66

77
#[cfg(unix)]
88
pub use posix::main_mcp_server;
9+
10+
#[cfg(unix)]
11+
pub use posix::ExecResult;

codex-rs/exec-server/src/posix.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ mod mcp_escalation_policy;
8282
mod socket;
8383
mod stopwatch;
8484

85+
pub use mcp::ExecResult;
86+
8587
/// Default value of --execve option relative to the current executable.
8688
/// Note this must match the name of the binary as specified in Cargo.toml.
8789
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";

codex-rs/exec-server/src/posix/mcp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub struct ExecParams {
5454
pub login: Option<bool>,
5555
}
5656

57-
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
57+
#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
5858
pub struct ExecResult {
5959
pub exit_code: i32,
6060
pub output: String,

codex-rs/exec-server/tests/all.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Single integration test binary that aggregates all test modules.
2+
// The submodules live in `tests/suite/`.
3+
mod suite;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "exec_server_test_support"
3+
version.workspace = true
4+
edition.workspace = true
5+
license.workspace = true
6+
7+
[lib]
8+
path = "lib.rs"
9+
10+
[dependencies]
11+
assert_cmd = { workspace = true }
12+
anyhow = { workspace = true }
13+
codex-core = { workspace = true }
14+
rmcp = { workspace = true }
15+
serde_json = { workspace = true }
16+
tokio = { workspace = true }
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
2+
use codex_core::SandboxState;
3+
use codex_core::protocol::SandboxPolicy;
4+
use rmcp::ClientHandler;
5+
use rmcp::ErrorData as McpError;
6+
use rmcp::RoleClient;
7+
use rmcp::Service;
8+
use rmcp::model::ClientCapabilities;
9+
use rmcp::model::ClientInfo;
10+
use rmcp::model::CreateElicitationRequestParam;
11+
use rmcp::model::CreateElicitationResult;
12+
use rmcp::model::CustomClientNotification;
13+
use rmcp::model::ElicitationAction;
14+
use rmcp::service::RunningService;
15+
use rmcp::transport::ConfigureCommandExt;
16+
use rmcp::transport::TokioChildProcess;
17+
use serde_json::json;
18+
use std::collections::HashSet;
19+
use std::path::Path;
20+
use std::path::PathBuf;
21+
use std::process::Stdio;
22+
use std::sync::Arc;
23+
use std::sync::Mutex;
24+
use tokio::process::Command;
25+
26+
pub fn create_transport<P>(codex_home: P) -> anyhow::Result<TokioChildProcess>
27+
where
28+
P: AsRef<Path>,
29+
{
30+
let mcp_executable = assert_cmd::Command::cargo_bin("codex-exec-mcp-server")?;
31+
let execve_wrapper = assert_cmd::Command::cargo_bin("codex-execve-wrapper")?;
32+
let bash = Path::new(env!("CARGO_MANIFEST_DIR"))
33+
.join("..")
34+
.join("..")
35+
.join("tests")
36+
.join("suite")
37+
.join("bash");
38+
39+
let transport =
40+
TokioChildProcess::new(Command::new(mcp_executable.get_program()).configure(|cmd| {
41+
cmd.arg("--bash").arg(bash);
42+
cmd.arg("--execve").arg(execve_wrapper.get_program());
43+
cmd.env("CODEX_HOME", codex_home.as_ref());
44+
45+
// Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout
46+
cmd.stdin(Stdio::piped());
47+
cmd.stdout(Stdio::piped());
48+
49+
// Optional but very helpful while debugging:
50+
cmd.stderr(Stdio::inherit());
51+
}))?;
52+
53+
Ok(transport)
54+
}
55+
56+
pub async fn write_default_execpolicy<P>(policy: &str, codex_home: P) -> anyhow::Result<()>
57+
where
58+
P: AsRef<Path>,
59+
{
60+
let policy_dir = codex_home.as_ref().join("policy");
61+
tokio::fs::create_dir_all(&policy_dir).await?;
62+
tokio::fs::write(policy_dir.join("default.codexpolicy"), policy).await?;
63+
Ok(())
64+
}
65+
66+
pub async fn notify_readable_sandbox<P, S>(
67+
sandbox_cwd: P,
68+
codex_linux_sandbox_exe: Option<PathBuf>,
69+
service: &RunningService<RoleClient, S>,
70+
) -> anyhow::Result<()>
71+
where
72+
P: AsRef<Path>,
73+
S: Service<RoleClient> + ClientHandler,
74+
{
75+
let sandbox_state = SandboxState {
76+
sandbox_policy: SandboxPolicy::ReadOnly,
77+
codex_linux_sandbox_exe,
78+
sandbox_cwd: sandbox_cwd.as_ref().to_path_buf(),
79+
};
80+
send_sandbox_notification(sandbox_state, service).await
81+
}
82+
83+
pub async fn notify_writable_sandbox_only_one_folder<P, S>(
84+
writable_folder: P,
85+
codex_linux_sandbox_exe: Option<PathBuf>,
86+
service: &RunningService<RoleClient, S>,
87+
) -> anyhow::Result<()>
88+
where
89+
P: AsRef<Path>,
90+
S: Service<RoleClient> + ClientHandler,
91+
{
92+
let sandbox_state = SandboxState {
93+
sandbox_policy: SandboxPolicy::WorkspaceWrite {
94+
// Note that sandbox_cwd will already be included as a writable root
95+
// when the sandbox policy is expanded.
96+
writable_roots: vec![],
97+
network_access: false,
98+
// Disable writes to temp dir because this is a test, so
99+
// writable_folder is likely also under /tmp and we want to be
100+
// strict about what is writable.
101+
exclude_tmpdir_env_var: true,
102+
exclude_slash_tmp: true,
103+
},
104+
codex_linux_sandbox_exe,
105+
sandbox_cwd: writable_folder.as_ref().to_path_buf(),
106+
};
107+
send_sandbox_notification(sandbox_state, service).await
108+
}
109+
110+
async fn send_sandbox_notification<S>(
111+
sandbox_state: SandboxState,
112+
service: &RunningService<RoleClient, S>,
113+
) -> anyhow::Result<()>
114+
where
115+
S: Service<RoleClient> + ClientHandler,
116+
{
117+
let sandbox_state_notification = CustomClientNotification::new(
118+
MCP_SANDBOX_STATE_NOTIFICATION,
119+
Some(serde_json::to_value(sandbox_state)?),
120+
);
121+
service
122+
.send_notification(sandbox_state_notification.into())
123+
.await?;
124+
Ok(())
125+
}
126+
127+
pub struct InteractiveClient {
128+
pub elicitations_to_accept: HashSet<String>,
129+
pub elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParam>>>,
130+
}
131+
132+
impl ClientHandler for InteractiveClient {
133+
fn get_info(&self) -> ClientInfo {
134+
let capabilities = ClientCapabilities::builder().enable_elicitation().build();
135+
ClientInfo {
136+
capabilities,
137+
..Default::default()
138+
}
139+
}
140+
141+
fn create_elicitation(
142+
&self,
143+
request: CreateElicitationRequestParam,
144+
_context: rmcp::service::RequestContext<RoleClient>,
145+
) -> impl std::future::Future<Output = Result<CreateElicitationResult, McpError>> + Send + '_
146+
{
147+
self.elicitation_requests
148+
.lock()
149+
.unwrap()
150+
.push(request.clone());
151+
152+
let accept = self.elicitations_to_accept.contains(&request.message);
153+
async move {
154+
if accept {
155+
Ok(CreateElicitationResult {
156+
action: ElicitationAction::Accept,
157+
content: Some(json!({ "approve": true })),
158+
})
159+
} else {
160+
Ok(CreateElicitationResult {
161+
action: ElicitationAction::Decline,
162+
content: None,
163+
})
164+
}
165+
}
166+
}
167+
}

0 commit comments

Comments
 (0)