fix: add test that verifies that codex-exec-mcp-server starts up

bolinfest · bolinfest · commit ce279a38982d · 2025-12-05T00:19:19.000-08:00
diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
@@ -96,6 +96,7 @@ codex-utils-readiness = { path = "utils/readiness" }
 codex-utils-string = { path = "utils/string" }
 codex-windows-sandbox = { path = "windows-sandbox-rs" }
 core_test_support = { path = "core/tests/common" }
+exec_server_test_support = { path = "exec-server/tests/common" }
 mcp-types = { path = "mcp-types" }
 mcp_test_support = { path = "mcp-server/tests/common" }
 
@@ -178,8 +179,8 @@ seccompiler = "0.5.0"
 sentry = "0.34.0"
 serde = "1"
 serde_json = "1"
-serde_yaml = "0.9"
 serde_with = "3.16"
+serde_yaml = "0.9"
 serial_test = "3.2.0"
 sha1 = "0.10.6"
 sha2 = "0.10"
diff --git a/codex-rs/exec-server/Cargo.toml b/codex-rs/exec-server/Cargo.toml
@@ -1,8 +1,8 @@
 [package]
-name = "codex-exec-server"
-version.workspace = true
 edition.workspace = true
 license.workspace = true
+name = "codex-exec-server"
+version.workspace = true
 
 [[bin]]
 name = "codex-execve-wrapper"
@@ -56,5 +56,9 @@ tracing = { workspace = true }
 tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] }
 
 [dev-dependencies]
+assert_cmd = { workspace = true }
+exec_server_test_support = { workspace = true }
+maplit = { workspace = true }
 pretty_assertions = { workspace = true }
 tempfile = { workspace = true }
+which = { workspace = true }
diff --git a/codex-rs/exec-server/src/lib.rs b/codex-rs/exec-server/src/lib.rs
@@ -6,3 +6,6 @@ pub use posix::main_execve_wrapper;
 
 #[cfg(unix)]
 pub use posix::main_mcp_server;
+
+#[cfg(unix)]
+pub use posix::ExecResult;
diff --git a/codex-rs/exec-server/src/posix.rs b/codex-rs/exec-server/src/posix.rs
@@ -82,6 +82,8 @@ mod mcp_escalation_policy;
 mod socket;
 mod stopwatch;
 
+pub use mcp::ExecResult;
+
 /// Default value of --execve option relative to the current executable.
 /// Note this must match the name of the binary as specified in Cargo.toml.
 const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper";
diff --git a/codex-rs/exec-server/src/posix/mcp.rs b/codex-rs/exec-server/src/posix/mcp.rs
@@ -54,7 +54,7 @@ pub struct ExecParams {
     pub login: Option<bool>,
 }
 
-#[derive(Debug, serde::Serialize, schemars::JsonSchema)]
+#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]
 pub struct ExecResult {
     pub exit_code: i32,
     pub output: String,
diff --git a/codex-rs/exec-server/tests/all.rs b/codex-rs/exec-server/tests/all.rs
@@ -0,0 +1,3 @@
+// Single integration test binary that aggregates all test modules.
+// The submodules live in `tests/suite/`.
+mod suite;
diff --git a/codex-rs/exec-server/tests/common/Cargo.toml b/codex-rs/exec-server/tests/common/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "exec_server_test_support"
+version.workspace = true
+edition.workspace = true
+license.workspace = true
+
+[lib]
+path = "lib.rs"
+
+[dependencies]
+assert_cmd = { workspace = true }
+anyhow = { workspace = true }
+codex-core = { workspace = true }
+rmcp = { workspace = true }
+serde_json = { workspace = true }
+tokio = { workspace = true }
diff --git a/codex-rs/exec-server/tests/common/lib.rs b/codex-rs/exec-server/tests/common/lib.rs
@@ -0,0 +1,164 @@
+use codex_core::MCP_SANDBOX_STATE_NOTIFICATION;
+use codex_core::SandboxState;
+use codex_core::protocol::SandboxPolicy;
+use rmcp::ClientHandler;
+use rmcp::ErrorData as McpError;
+use rmcp::RoleClient;
+use rmcp::Service;
+use rmcp::model::ClientCapabilities;
+use rmcp::model::ClientInfo;
+use rmcp::model::CreateElicitationRequestParam;
+use rmcp::model::CreateElicitationResult;
+use rmcp::model::CustomClientNotification;
+use rmcp::model::ElicitationAction;
+use rmcp::service::RunningService;
+use rmcp::transport::ConfigureCommandExt;
+use rmcp::transport::TokioChildProcess;
+use serde_json::json;
+use std::collections::HashSet;
+use std::path::Path;
+use std::process::Stdio;
+use std::sync::Arc;
+use std::sync::Mutex;
+use tokio::process::Command;
+
+pub fn create_transport<P>(codex_home: P) -> anyhow::Result<TokioChildProcess>
+where
+    P: AsRef<Path>,
+{
+    let mcp_executable = assert_cmd::Command::cargo_bin("codex-exec-mcp-server")?;
+    let execve_wrapper = assert_cmd::Command::cargo_bin("codex-execve-wrapper")?;
+    let bash = Path::new(env!("CARGO_MANIFEST_DIR"))
+        .join("..")
+        .join("..")
+        .join("tests")
+        .join("suite")
+        .join("bash");
+
+    let transport =
+        TokioChildProcess::new(Command::new(mcp_executable.get_program()).configure(|cmd| {
+            cmd.arg("--bash").arg(bash);
+            cmd.arg("--execve").arg(execve_wrapper.get_program());
+            cmd.env("CODEX_HOME", codex_home.as_ref());
+
+            // Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout
+            cmd.stdin(Stdio::piped());
+            cmd.stdout(Stdio::piped());
+
+            // Optional but very helpful while debugging:
+            cmd.stderr(Stdio::inherit());
+        }))?;
+
+    Ok(transport)
+}
+
+pub async fn write_default_execpolicy<P>(policy: &str, codex_home: P) -> anyhow::Result<()>
+where
+    P: AsRef<Path>,
+{
+    let policy_dir = codex_home.as_ref().join("policy");
+    tokio::fs::create_dir_all(&policy_dir).await?;
+    tokio::fs::write(policy_dir.join("default.codexpolicy"), policy).await?;
+    Ok(())
+}
+
+pub async fn notify_readable_sandbox<P, S>(
+    writable_folder: P,
+    service: &RunningService<RoleClient, S>,
+) -> anyhow::Result<()>
+where
+    P: AsRef<Path>,
+    S: Service<RoleClient> + ClientHandler,
+{
+    let sandbox_state = SandboxState {
+        sandbox_policy: SandboxPolicy::ReadOnly,
+        codex_linux_sandbox_exe: None,
+        sandbox_cwd: writable_folder.as_ref().to_path_buf(),
+    };
+    send_sandbox_notification(sandbox_state, service).await
+}
+
+pub async fn notify_writable_sandbox_only_one_folder<P, S>(
+    writable_folder: P,
+    service: &RunningService<RoleClient, S>,
+) -> anyhow::Result<()>
+where
+    P: AsRef<Path>,
+    S: Service<RoleClient> + ClientHandler,
+{
+    let sandbox_state = SandboxState {
+        sandbox_policy: SandboxPolicy::WorkspaceWrite {
+            // Note that sandbox_cwd will already be included as a writable root
+            // when the sandbox policy is expanded.
+            writable_roots: vec![],
+            network_access: false,
+            // Disable writes to temp dir because this is a test, so
+            // writable_folder is likely also under /tmp and we want to be
+            // strict about what is writable.
+            exclude_tmpdir_env_var: true,
+            exclude_slash_tmp: true,
+        },
+        codex_linux_sandbox_exe: None,
+        sandbox_cwd: writable_folder.as_ref().to_path_buf(),
+    };
+    send_sandbox_notification(sandbox_state, service).await
+}
+
+async fn send_sandbox_notification<S>(
+    sandbox_state: SandboxState,
+    service: &RunningService<RoleClient, S>,
+) -> anyhow::Result<()>
+where
+    S: Service<RoleClient> + ClientHandler,
+{
+    let sandbox_state_notification = CustomClientNotification::new(
+        MCP_SANDBOX_STATE_NOTIFICATION,
+        Some(serde_json::to_value(sandbox_state)?),
+    );
+    service
+        .send_notification(sandbox_state_notification.into())
+        .await?;
+    Ok(())
+}
+
+pub struct InteractiveClient {
+    pub elicitations_to_accept: HashSet<String>,
+    pub elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParam>>>,
+}
+
+impl ClientHandler for InteractiveClient {
+    fn get_info(&self) -> ClientInfo {
+        let capabilities = ClientCapabilities::builder().enable_elicitation().build();
+        ClientInfo {
+            capabilities,
+            ..Default::default()
+        }
+    }
+
+    fn create_elicitation(
+        &self,
+        request: CreateElicitationRequestParam,
+        _context: rmcp::service::RequestContext<RoleClient>,
+    ) -> impl std::future::Future<Output = Result<CreateElicitationResult, McpError>> + Send + '_
+    {
+        self.elicitation_requests
+            .lock()
+            .unwrap()
+            .push(request.clone());
+
+        let accept = self.elicitations_to_accept.contains(&request.message);
+        async move {
+            if accept {
+                Ok(CreateElicitationResult {
+                    action: ElicitationAction::Accept,
+                    content: Some(json!({ "approve": true })),
+                })
+            } else {
+                Ok(CreateElicitationResult {
+                    action: ElicitationAction::Decline,
+                    content: None,
+                })
+            }
+        }
+    }
+}
diff --git a/codex-rs/exec-server/tests/suite/auto_approve.rs b/codex-rs/exec-server/tests/suite/auto_approve.rs
@@ -0,0 +1,107 @@
+#![allow(clippy::unwrap_used, clippy::expect_used)]
+use std::borrow::Cow;
+use std::sync::Arc;
+use std::sync::Mutex;
+
+use anyhow::Result;
+use codex_exec_server::ExecResult;
+use exec_server_test_support::InteractiveClient;
+use exec_server_test_support::create_transport;
+use exec_server_test_support::notify_readable_sandbox;
+use exec_server_test_support::write_default_execpolicy;
+use maplit::hashset;
+use pretty_assertions::assert_eq;
+use rmcp::ServiceExt;
+use rmcp::model::CallToolRequestParam;
+use rmcp::model::CallToolResult;
+use rmcp::model::CreateElicitationRequestParam;
+use rmcp::model::object;
+use serde_json::json;
+use tempfile::TempDir;
+
+/// Verify that when using a read-only sandbox and an execpolicy that prompts,
+/// the proper elicitation is sent. Upon auto-approving the elicitation, the
+/// command should be run privileged outside the sandbox.
+#[tokio::test(flavor = "current_thread")]
+async fn auto_approve() -> Result<()> {
+    // Configure a stdio transport that will launch the MCP server using
+    // $CODEX_HOME with an execpolicy that prompts for `git init` commands.
+    let codex_home = TempDir::new()?;
+    write_default_execpolicy(
+        r#"
+# Create a rule with `decision = "prompt"` to exercise the elicitation flow.
+prefix_rule(
+  pattern = ["git", "init"],
+  decision = "prompt",
+  match = [
+    "git init ."
+  ],
+)
+"#,
+        codex_home.as_ref(),
+    )
+    .await?;
+    let transport = create_transport(codex_home.as_ref())?;
+
+    // Create an MCP client that approves expected elicitation messages.
+    let project_root = TempDir::new()?;
+    let git = which::which("git")?;
+    let project_root_path = project_root.path().canonicalize().unwrap();
+    let expected_elicitation_message = format!(
+        "Allow agent to run `{} init .` in `{}`?",
+        git.display(),
+        project_root_path.display()
+    );
+    let elicitation_requests: Arc<Mutex<Vec<CreateElicitationRequestParam>>> = Default::default();
+    let client = InteractiveClient {
+        elicitations_to_accept: hashset! { expected_elicitation_message.clone() },
+        elicitation_requests: elicitation_requests.clone(),
+    };
+
+    // Start the MCP server and notify it about the readable sandbox.
+    let service = client.serve(transport).await?;
+    notify_readable_sandbox(&project_root_path, &service).await?;
+
+    // Call the shell tool and verify that an elicitation was created and
+    // auto-approved.
+    let CallToolResult {
+        content, is_error, ..
+    } = service
+        .call_tool(CallToolRequestParam {
+            name: Cow::Borrowed("shell"),
+            arguments: Some(object(json!(
+                {
+                    "command": "git init .",
+                    "workdir": project_root_path.to_string_lossy(),
+                }
+            ))),
+        })
+        .await?;
+    let tool_call_content = content
+        .first()
+        .expect("expected non-empty content")
+        .as_text()
+        .expect("expected text content");
+    let ExecResult {
+        exit_code, output, ..
+    } = serde_json::from_str::<ExecResult>(&tool_call_content.text)?;
+    assert_eq!(
+        output,
+        format!(
+            "Initialized empty Git repository in {}/.git/\n",
+            project_root_path.display()
+        )
+    );
+    assert_eq!(exit_code, 0, "command should succeed");
+    assert_eq!(is_error, Some(false), "command should succeed");
+
+    let elicitation_messages = elicitation_requests
+        .lock()
+        .unwrap()
+        .iter()
+        .map(|r| r.message.clone())
+        .collect::<Vec<_>>();
+    assert_eq!(vec![expected_elicitation_message], elicitation_messages);
+
+    Ok(())
+}
diff --git a/codex-rs/exec-server/tests/suite/bash b/codex-rs/exec-server/tests/suite/bash
diff --git a/codex-rs/exec-server/tests/suite/list_tools.rs b/codex-rs/exec-server/tests/suite/list_tools.rs
diff --git a/codex-rs/exec-server/tests/suite/mod.rs b/codex-rs/exec-server/tests/suite/mod.rs

Original file line number	Diff line number	Diff line change
`@@ -54,7 +54,7 @@ pub struct ExecParams {`
`54`	`54`	`pub login: Option<bool>,`
`55`	`55`	`}`
`56`	`56`
`57`		`-#[derive(Debug, serde::Serialize, schemars::JsonSchema)]`
	`57`	`+#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)]`
`58`	`58`	`pub struct ExecResult {`
`59`	`59`	`pub exit_code: i32,`
`60`	`60`	`pub output: String,`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+// Single integration test binary that aggregates all test modules.`
	`2`	+// The submodules live in `tests/suite/`.
	`3`	`+mod suite;`