Skip to content

Commit ce279a3

Browse files
committed
fix: add test that verifies that codex-exec-mcp-server starts up
1 parent b1c918d commit ce279a3

File tree

13 files changed

+471
-4
lines changed

13 files changed

+471
-4
lines changed

codex-rs/Cargo.lock

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

codex-rs/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ codex-utils-readiness = { path = "utils/readiness" }
9696
codex-utils-string = { path = "utils/string" }
9797
codex-windows-sandbox = { path = "windows-sandbox-rs" }
9898
core_test_support = { path = "core/tests/common" }
99+
exec_server_test_support = { path = "exec-server/tests/common" }
99100
mcp-types = { path = "mcp-types" }
100101
mcp_test_support = { path = "mcp-server/tests/common" }
101102

@@ -178,8 +179,8 @@ seccompiler = "0.5.0"
178179
sentry = "0.34.0"
179180
serde = "1"
180181
serde_json = "1"
181-
serde_yaml = "0.9"
182182
serde_with = "3.16"
183+
serde_yaml = "0.9"
183184
serial_test = "3.2.0"
184185
sha1 = "0.10.6"
185186
sha2 = "0.10"

codex-rs/exec-server/Cargo.toml

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
[package]
2-
name = "codex-exec-server"
3-
version.workspace = true
42
edition.workspace = true
53
license.workspace = true
4+
name = "codex-exec-server"
5+
version.workspace = true
66

77
[[bin]]
88
name = "codex-execve-wrapper"
@@ -56,5 +56,9 @@ tracing = { workspace = true }
5656
tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
5757

5858
[dev-dependencies]
59+
assert_cmd = { workspace = true }
60+
exec_server_test_support = { workspace = true }
61+
maplit = { workspace = true }
5962
pretty_assertions = { workspace = true }
6063
tempfile = { workspace = true }
64+
which = { workspace = true }

codex-rs/exec-server/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,6 @@ pub use posix::main_execve_wrapper;
66

77
#[cfg(unix)]
88
pub use posix::main_mcp_server;
9+
10+
#[cfg(unix)]
11+
pub use posix::ExecResult;

codex-rs/exec-server/src/posix.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ mod mcp_escalation_policy;
8282
mod socket;
8383
mod stopwatch;
8484

85+
pub use mcp::ExecResult;
86+
8587
/// Default value of --execve option relative to the current executable.
8688
/// Note this must match the name of the binary as specified in Cargo.toml.
8789
const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";

codex-rs/exec-server/src/posix/mcp.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ pub struct ExecParams {
5454
pub login: Option<bool>,
5555
}
5656

57-
#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
57+
#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
5858
pub struct ExecResult {
5959
pub exit_code: i32,
6060
pub output: String,

codex-rs/exec-server/tests/all.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
// Single integration test binary that aggregates all test modules.
2+
// The submodules live in `tests/suite/`.
3+
mod suite;
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "exec_server_test_support"
3+
version.workspace = true
4+
edition.workspace = true
5+
license.workspace = true
6+
7+
[lib]
8+
path = "lib.rs"
9+
10+
[dependencies]
11+
assert_cmd = { workspace = true }
12+
anyhow = { workspace = true }
13+
codex-core = { workspace = true }
14+
rmcp = { workspace = true }
15+
serde_json = { workspace = true }
16+
tokio = { workspace = true }
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
2+
use codex_core::SandboxState;
3+
use codex_core::protocol::SandboxPolicy;
4+
use rmcp::ClientHandler;
5+
use rmcp::ErrorData as McpError;
6+
use rmcp::RoleClient;
7+
use rmcp::Service;
8+
use rmcp::model::ClientCapabilities;
9+
use rmcp::model::ClientInfo;
10+
use rmcp::model::CreateElicitationRequestParam;
11+
use rmcp::model::CreateElicitationResult;
12+
use rmcp::model::CustomClientNotification;
13+
use rmcp::model::ElicitationAction;
14+
use rmcp::service::RunningService;
15+
use rmcp::transport::ConfigureCommandExt;
16+
use rmcp::transport::TokioChildProcess;
17+
use serde_json::json;
18+
use std::collections::HashSet;
19+
use std::path::Path;
20+
use std::process::Stdio;
21+
use std::sync::Arc;
22+
use std::sync::Mutex;
23+
use tokio::process::Command;
24+
25+
pub fn create_transport<P>(codex_home: P) -> anyhow::Result<TokioChildProcess>
26+
where
27+
P: AsRef<Path>,
28+
{
29+
let mcp_executable = assert_cmd::Command::cargo_bin("codex-exec-mcp-server")?;
30+
let execve_wrapper = assert_cmd::Command::cargo_bin("codex-execve-wrapper")?;
31+
let bash = Path::new(env!("CARGO_MANIFEST_DIR"))
32+
.join("..")
33+
.join("..")
34+
.join("tests")
35+
.join("suite")
36+
.join("bash");
37+
38+
let transport =
39+
TokioChildProcess::new(Command::new(mcp_executable.get_program()).configure(|cmd| {
40+
cmd.arg("--bash").arg(bash);
41+
cmd.arg("--execve").arg(execve_wrapper.get_program());
42+
cmd.env("CODEX_HOME", codex_home.as_ref());
43+
44+
// Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout
45+
cmd.stdin(Stdio::piped());
46+
cmd.stdout(Stdio::piped());
47+
48+
// Optional but very helpful while debugging:
49+
cmd.stderr(Stdio::inherit());
50+
}))?;
51+
52+
Ok(transport)
53+
}
54+
55+
pub async fn write_default_execpolicy<P>(policy: &str, codex_home: P) -> anyhow::Result<()>
56+
where
57+
P: AsRef<Path>,
58+
{
59+
let policy_dir = codex_home.as_ref().join("policy");
60+
tokio::fs::create_dir_all(&policy_dir).await?;
61+
tokio::fs::write(policy_dir.join("default.codexpolicy"), policy).await?;
62+
Ok(())
63+
}
64+
65+
pub async fn notify_readable_sandbox<P, S>(
66+
writable_folder: P,
67+
service: &RunningService<RoleClient, S>,
68+
) -> anyhow::Result<()>
69+
where
70+
P: AsRef<Path>,
71+
S: Service<RoleClient> + ClientHandler,
72+
{
73+
let sandbox_state = SandboxState {
74+
sandbox_policy: SandboxPolicy::ReadOnly,
75+
codex_linux_sandbox_exe: None,
76+
sandbox_cwd: writable_folder.as_ref().to_path_buf(),
77+
};
78+
send_sandbox_notification(sandbox_state, service).await
79+
}
80+
81+
pub async fn notify_writable_sandbox_only_one_folder<P, S>(
82+
writable_folder: P,
83+
service: &RunningService<RoleClient, S>,
84+
) -> anyhow::Result<()>
85+
where
86+
P: AsRef<Path>,
87+
S: Service<RoleClient> + ClientHandler,
88+
{
89+
let sandbox_state = SandboxState {
90+
sandbox_policy: SandboxPolicy::WorkspaceWrite {
91+
// Note that sandbox_cwd will already be included as a writable root
92+
// when the sandbox policy is expanded.
93+
writable_roots: vec![],
94+
network_access: false,
95+
// Disable writes to temp dir because this is a test, so
96+
// writable_folder is likely also under /tmp and we want to be
97+
// strict about what is writable.
98+
exclude_tmpdir_env_var: true,
99+
exclude_slash_tmp: true,
100+
},
101+
codex_linux_sandbox_exe: None,
102+
sandbox_cwd: writable_folder.as_ref().to_path_buf(),
103+
};
104+
send_sandbox_notification(sandbox_state, service).await
105+
}
106+
107+
async fn send_sandbox_notification<S>(
108+
sandbox_state: SandboxState,
109+
service: &RunningService<RoleClient, S>,
110+
) -> anyhow::Result<()>
111+
where
112+
S: Service<RoleClient> + ClientHandler,
113+
{
114+
let sandbox_state_notification = CustomClientNotification::new(
115+
MCP_SANDBOX_STATE_NOTIFICATION,
116+
Some(serde_json::to_value(sandbox_state)?),
117+
);
118+
service
119+
.send_notification(sandbox_state_notification.into())
120+
.await?;
121+
Ok(())
122+
}
123+
124+
pub struct InteractiveClient {
125+
pub elicitations_to_accept: HashSet<String>,
126+
pub elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParam>>>,
127+
}
128+
129+
impl ClientHandler for InteractiveClient {
130+
fn get_info(&self) -> ClientInfo {
131+
let capabilities = ClientCapabilities::builder().enable_elicitation().build();
132+
ClientInfo {
133+
capabilities,
134+
..Default::default()
135+
}
136+
}
137+
138+
fn create_elicitation(
139+
&self,
140+
request: CreateElicitationRequestParam,
141+
_context: rmcp::service::RequestContext<RoleClient>,
142+
) -> impl std::future::Future<Output = Result<CreateElicitationResult, McpError>> + Send + '_
143+
{
144+
self.elicitation_requests
145+
.lock()
146+
.unwrap()
147+
.push(request.clone());
148+
149+
let accept = self.elicitations_to_accept.contains(&request.message);
150+
async move {
151+
if accept {
152+
Ok(CreateElicitationResult {
153+
action: ElicitationAction::Accept,
154+
content: Some(json!({ "approve": true })),
155+
})
156+
} else {
157+
Ok(CreateElicitationResult {
158+
action: ElicitationAction::Decline,
159+
content: None,
160+
})
161+
}
162+
}
163+
}
164+
}
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#![allow(clippy::unwrap_used, clippy::expect_used)]
2+
use std::borrow::Cow;
3+
use std::sync::Arc;
4+
use std::sync::Mutex;
5+
6+
use anyhow::Result;
7+
use codex_exec_server::ExecResult;
8+
use exec_server_test_support::InteractiveClient;
9+
use exec_server_test_support::create_transport;
10+
use exec_server_test_support::notify_readable_sandbox;
11+
use exec_server_test_support::write_default_execpolicy;
12+
use maplit::hashset;
13+
use pretty_assertions::assert_eq;
14+
use rmcp::ServiceExt;
15+
use rmcp::model::CallToolRequestParam;
16+
use rmcp::model::CallToolResult;
17+
use rmcp::model::CreateElicitationRequestParam;
18+
use rmcp::model::object;
19+
use serde_json::json;
20+
use tempfile::TempDir;
21+
22+
/// Verify that when using a read-only sandbox and an execpolicy that prompts,
23+
/// the proper elicitation is sent. Upon auto-approving the elicitation, the
24+
/// command should be run privileged outside the sandbox.
25+
#[tokio::test(flavor = "current_thread")]
26+
async fn auto_approve() -> Result<()> {
27+
// Configure a stdio transport that will launch the MCP server using
28+
// $CODEX_HOME with an execpolicy that prompts for `git init` commands.
29+
let codex_home = TempDir::new()?;
30+
write_default_execpolicy(
31+
r#"
32+
# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
33+
prefix_rule(
34+
pattern = ["git", "init"],
35+
decision = "prompt",
36+
match = [
37+
"git init ."
38+
],
39+
)
40+
"#,
41+
codex_home.as_ref(),
42+
)
43+
.await?;
44+
let transport = create_transport(codex_home.as_ref())?;
45+
46+
// Create an MCP client that approves expected elicitation messages.
47+
let project_root = TempDir::new()?;
48+
let git = which::which("git")?;
49+
let project_root_path = project_root.path().canonicalize().unwrap();
50+
let expected_elicitation_message = format!(
51+
"Allow agent to run `{} init .` in `{}`?",
52+
git.display(),
53+
project_root_path.display()
54+
);
55+
let elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParam>>> = Default::default();
56+
let client = InteractiveClient {
57+
elicitations_to_accept: hashset! { expected_elicitation_message.clone() },
58+
elicitation_requests: elicitation_requests.clone(),
59+
};
60+
61+
// Start the MCP server and notify it about the readable sandbox.
62+
let service = client.serve(transport).await?;
63+
notify_readable_sandbox(&project_root_path, &service).await?;
64+
65+
// Call the shell tool and verify that an elicitation was created and
66+
// auto-approved.
67+
let CallToolResult {
68+
content, is_error, ..
69+
} = service
70+
.call_tool(CallToolRequestParam {
71+
name: Cow::Borrowed("shell"),
72+
arguments: Some(object(json!(
73+
{
74+
"command": "git init .",
75+
"workdir": project_root_path.to_string_lossy(),
76+
}
77+
))),
78+
})
79+
.await?;
80+
let tool_call_content = content
81+
.first()
82+
.expect("expected non-empty content")
83+
.as_text()
84+
.expect("expected text content");
85+
let ExecResult {
86+
exit_code, output, ..
87+
} = serde_json::from_str::<ExecResult>(&tool_call_content.text)?;
88+
assert_eq!(
89+
output,
90+
format!(
91+
"Initialized empty Git repository in {}/.git/\n",
92+
project_root_path.display()
93+
)
94+
);
95+
assert_eq!(exit_code, 0, "command should succeed");
96+
assert_eq!(is_error, Some(false), "command should succeed");
97+
98+
let elicitation_messages = elicitation_requests
99+
.lock()
100+
.unwrap()
101+
.iter()
102+
.map(|r| r.message.clone())
103+
.collect::<Vec<_>>();
104+
assert_eq!(vec![expected_elicitation_message], elicitation_messages);
105+
106+
Ok(())
107+
}

0 commit comments

Comments
 (0)