diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index 354b40392..0be45540c 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -407,6 +407,19 @@ jobs: brew upgrade brew --version + # Some integration tests rely on DotSlash being installed. + # See https://github.com/openai/codex/pull/7617. + - name: Install DotSlash + uses: facebook/install-dotslash@v2 + + - name: Pre-fetch DotSlash artifacts + # The Bash wrapper is not available on Windows. + if: ${{ !startsWith(matrix.runner, 'windows') }} + shell: bash + run: | + set -euo pipefail + dotslash -- fetch exec-server/tests/suite/bash + - uses: dtolnay/rust-toolchain@1.90 with: targets: ${{ matrix.target }} diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index b77cf01b0..ea1eec8f8 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1256,11 +1256,14 @@ name = "codex-exec-server" version = "0.0.0" dependencies = [ "anyhow", + "assert_cmd", "async-trait", "clap", "codex-core", "codex-execpolicy", + "exec_server_test_support", "libc", + "maplit", "path-absolutize", "pretty_assertions", "rmcp", @@ -1273,6 +1276,7 @@ dependencies = [ "tokio-util", "tracing", "tracing-subscriber", + "which", ] [[package]] @@ -2501,6 +2505,18 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "exec_server_test_support" +version = "0.0.0" +dependencies = [ + "anyhow", + "assert_cmd", + "codex-core", + "rmcp", + "serde_json", + "tokio", +] + [[package]] name = "eyre" version = "0.6.12" diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml index d3d3c36c3..0587ec1d4 100644 --- a/codex-rs/Cargo.toml +++ b/codex-rs/Cargo.toml @@ -96,6 +96,7 @@ codex-utils-readiness = { path = "utils/readiness" } codex-utils-string = { path = "utils/string" } codex-windows-sandbox = { path = "windows-sandbox-rs" } core_test_support = { path = "core/tests/common" } +exec_server_test_support = { path = "exec-server/tests/common" } mcp-types = { path = "mcp-types" } mcp_test_support = { path = "mcp-server/tests/common" } diff --git a/codex-rs/exec-server/Cargo.toml b/codex-rs/exec-server/Cargo.toml index ab6ca80a1..a0bd53493 100644 --- a/codex-rs/exec-server/Cargo.toml +++ b/codex-rs/exec-server/Cargo.toml @@ -56,5 +56,9 @@ tracing = { workspace = true } tracing-subscriber = { workspace = true, features = ["env-filter", "fmt"] } [dev-dependencies] +assert_cmd = { workspace = true } +exec_server_test_support = { workspace = true } +maplit = { workspace = true } pretty_assertions = { workspace = true } tempfile = { workspace = true } +which = { workspace = true } diff --git a/codex-rs/exec-server/src/lib.rs b/codex-rs/exec-server/src/lib.rs index adec09d4d..62f7bbccc 100644 --- a/codex-rs/exec-server/src/lib.rs +++ b/codex-rs/exec-server/src/lib.rs @@ -6,3 +6,6 @@ pub use posix::main_execve_wrapper; #[cfg(unix)] pub use posix::main_mcp_server; + +#[cfg(unix)] +pub use posix::ExecResult; diff --git a/codex-rs/exec-server/src/posix.rs b/codex-rs/exec-server/src/posix.rs index 16da5885f..1a4b0a0e1 100644 --- a/codex-rs/exec-server/src/posix.rs +++ b/codex-rs/exec-server/src/posix.rs @@ -82,6 +82,8 @@ mod mcp_escalation_policy; mod socket; mod stopwatch; +pub use mcp::ExecResult; + /// Default value of --execve option relative to the current executable. /// Note this must match the name of the binary as specified in Cargo.toml. const CODEX_EXECVE_WRAPPER_EXE_NAME: &str = "codex-execve-wrapper"; diff --git a/codex-rs/exec-server/src/posix/mcp.rs b/codex-rs/exec-server/src/posix/mcp.rs index bbbddc22e..1376d46b7 100644 --- a/codex-rs/exec-server/src/posix/mcp.rs +++ b/codex-rs/exec-server/src/posix/mcp.rs @@ -54,7 +54,7 @@ pub struct ExecParams { pub login: Option, } -#[derive(Debug, serde::Serialize, schemars::JsonSchema)] +#[derive(Debug, serde::Serialize, serde::Deserialize, schemars::JsonSchema)] pub struct ExecResult { pub exit_code: i32, pub output: String, diff --git a/codex-rs/exec-server/tests/all.rs b/codex-rs/exec-server/tests/all.rs new file mode 100644 index 000000000..7e136e4cc --- /dev/null +++ b/codex-rs/exec-server/tests/all.rs @@ -0,0 +1,3 @@ +// Single integration test binary that aggregates all test modules. +// The submodules live in `tests/suite/`. +mod suite; diff --git a/codex-rs/exec-server/tests/common/Cargo.toml b/codex-rs/exec-server/tests/common/Cargo.toml new file mode 100644 index 000000000..ba7d2af0a --- /dev/null +++ b/codex-rs/exec-server/tests/common/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "exec_server_test_support" +version.workspace = true +edition.workspace = true +license.workspace = true + +[lib] +path = "lib.rs" + +[dependencies] +assert_cmd = { workspace = true } +anyhow = { workspace = true } +codex-core = { workspace = true } +rmcp = { workspace = true } +serde_json = { workspace = true } +tokio = { workspace = true } diff --git a/codex-rs/exec-server/tests/common/lib.rs b/codex-rs/exec-server/tests/common/lib.rs new file mode 100644 index 000000000..c6df5c32c --- /dev/null +++ b/codex-rs/exec-server/tests/common/lib.rs @@ -0,0 +1,167 @@ +use codex_core::MCP_SANDBOX_STATE_NOTIFICATION; +use codex_core::SandboxState; +use codex_core::protocol::SandboxPolicy; +use rmcp::ClientHandler; +use rmcp::ErrorData as McpError; +use rmcp::RoleClient; +use rmcp::Service; +use rmcp::model::ClientCapabilities; +use rmcp::model::ClientInfo; +use rmcp::model::CreateElicitationRequestParam; +use rmcp::model::CreateElicitationResult; +use rmcp::model::CustomClientNotification; +use rmcp::model::ElicitationAction; +use rmcp::service::RunningService; +use rmcp::transport::ConfigureCommandExt; +use rmcp::transport::TokioChildProcess; +use serde_json::json; +use std::collections::HashSet; +use std::path::Path; +use std::path::PathBuf; +use std::process::Stdio; +use std::sync::Arc; +use std::sync::Mutex; +use tokio::process::Command; + +pub fn create_transport

(codex_home: P) -> anyhow::Result +where + P: AsRef, +{ + let mcp_executable = assert_cmd::Command::cargo_bin("codex-exec-mcp-server")?; + let execve_wrapper = assert_cmd::Command::cargo_bin("codex-execve-wrapper")?; + let bash = Path::new(env!("CARGO_MANIFEST_DIR")) + .join("..") + .join("..") + .join("tests") + .join("suite") + .join("bash"); + + let transport = + TokioChildProcess::new(Command::new(mcp_executable.get_program()).configure(|cmd| { + cmd.arg("--bash").arg(bash); + cmd.arg("--execve").arg(execve_wrapper.get_program()); + cmd.env("CODEX_HOME", codex_home.as_ref()); + + // Important: pipe stdio so rmcp can speak JSON-RPC over stdin/stdout + cmd.stdin(Stdio::piped()); + cmd.stdout(Stdio::piped()); + + // Optional but very helpful while debugging: + cmd.stderr(Stdio::inherit()); + }))?; + + Ok(transport) +} + +pub async fn write_default_execpolicy

(policy: &str, codex_home: P) -> anyhow::Result<()> +where + P: AsRef, +{ + let policy_dir = codex_home.as_ref().join("policy"); + tokio::fs::create_dir_all(&policy_dir).await?; + tokio::fs::write(policy_dir.join("default.codexpolicy"), policy).await?; + Ok(()) +} + +pub async fn notify_readable_sandbox( + sandbox_cwd: P, + codex_linux_sandbox_exe: Option, + service: &RunningService, +) -> anyhow::Result<()> +where + P: AsRef, + S: Service + ClientHandler, +{ + let sandbox_state = SandboxState { + sandbox_policy: SandboxPolicy::ReadOnly, + codex_linux_sandbox_exe, + sandbox_cwd: sandbox_cwd.as_ref().to_path_buf(), + }; + send_sandbox_notification(sandbox_state, service).await +} + +pub async fn notify_writable_sandbox_only_one_folder( + writable_folder: P, + codex_linux_sandbox_exe: Option, + service: &RunningService, +) -> anyhow::Result<()> +where + P: AsRef, + S: Service + ClientHandler, +{ + let sandbox_state = SandboxState { + sandbox_policy: SandboxPolicy::WorkspaceWrite { + // Note that sandbox_cwd will already be included as a writable root + // when the sandbox policy is expanded. + writable_roots: vec![], + network_access: false, + // Disable writes to temp dir because this is a test, so + // writable_folder is likely also under /tmp and we want to be + // strict about what is writable. + exclude_tmpdir_env_var: true, + exclude_slash_tmp: true, + }, + codex_linux_sandbox_exe, + sandbox_cwd: writable_folder.as_ref().to_path_buf(), + }; + send_sandbox_notification(sandbox_state, service).await +} + +async fn send_sandbox_notification( + sandbox_state: SandboxState, + service: &RunningService, +) -> anyhow::Result<()> +where + S: Service + ClientHandler, +{ + let sandbox_state_notification = CustomClientNotification::new( + MCP_SANDBOX_STATE_NOTIFICATION, + Some(serde_json::to_value(sandbox_state)?), + ); + service + .send_notification(sandbox_state_notification.into()) + .await?; + Ok(()) +} + +pub struct InteractiveClient { + pub elicitations_to_accept: HashSet, + pub elicitation_requests: Arc>>, +} + +impl ClientHandler for InteractiveClient { + fn get_info(&self) -> ClientInfo { + let capabilities = ClientCapabilities::builder().enable_elicitation().build(); + ClientInfo { + capabilities, + ..Default::default() + } + } + + fn create_elicitation( + &self, + request: CreateElicitationRequestParam, + _context: rmcp::service::RequestContext, + ) -> impl std::future::Future> + Send + '_ + { + self.elicitation_requests + .lock() + .unwrap() + .push(request.clone()); + + let accept = self.elicitations_to_accept.contains(&request.message); + async move { + if accept { + Ok(CreateElicitationResult { + action: ElicitationAction::Accept, + content: Some(json!({ "approve": true })), + }) + } else { + Ok(CreateElicitationResult { + action: ElicitationAction::Decline, + content: None, + }) + } + } + } +} diff --git a/codex-rs/exec-server/tests/suite/accept_elicitation.rs b/codex-rs/exec-server/tests/suite/accept_elicitation.rs new file mode 100644 index 000000000..2093f9a57 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/accept_elicitation.rs @@ -0,0 +1,131 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] +use std::borrow::Cow; +use std::sync::Arc; +use std::sync::Mutex; + +use anyhow::Result; +use codex_exec_server::ExecResult; +use exec_server_test_support::InteractiveClient; +use exec_server_test_support::create_transport; +use exec_server_test_support::notify_readable_sandbox; +use exec_server_test_support::write_default_execpolicy; +use maplit::hashset; +use pretty_assertions::assert_eq; +use rmcp::ServiceExt; +use rmcp::model::CallToolRequestParam; +use rmcp::model::CallToolResult; +use rmcp::model::CreateElicitationRequestParam; +use rmcp::model::object; +use serde_json::json; +use std::os::unix::fs::symlink; +use tempfile::TempDir; + +/// Verify that when using a read-only sandbox and an execpolicy that prompts, +/// the proper elicitation is sent. Upon auto-approving the elicitation, the +/// command should be run privileged outside the sandbox. +#[tokio::test(flavor = "current_thread")] +async fn accept_elicitation_for_prompt_rule() -> Result<()> { + // Configure a stdio transport that will launch the MCP server using + // $CODEX_HOME with an execpolicy that prompts for `git init` commands. + let codex_home = TempDir::new()?; + write_default_execpolicy( + r#" +# Create a rule with `decision = "prompt"` to exercise the elicitation flow. +prefix_rule( + pattern = ["git", "init"], + decision = "prompt", + match = [ + "git init ." + ], +) +"#, + codex_home.as_ref(), + ) + .await?; + let transport = create_transport(codex_home.as_ref())?; + + // Create an MCP client that approves expected elicitation messages. + let project_root = TempDir::new()?; + let git = which::which("git")?; + let project_root_path = project_root.path().canonicalize().unwrap(); + let expected_elicitation_message = format!( + "Allow agent to run `{} init .` in `{}`?", + git.display(), + project_root_path.display() + ); + let elicitation_requests: Arc>> = Default::default(); + let client = InteractiveClient { + elicitations_to_accept: hashset! { expected_elicitation_message.clone() }, + elicitation_requests: elicitation_requests.clone(), + }; + + // Start the MCP server. + let service: rmcp::service::RunningService = + client.serve(transport).await?; + + // Notify the MCP server about the current sandbox state before making any + // `shell` tool calls. + let linux_sandbox_exe_folder = TempDir::new()?; + let codex_linux_sandbox_exe = if cfg!(target_os = "linux") { + let codex_linux_sandbox_exe = linux_sandbox_exe_folder.path().join("codex-linux-sandbox"); + let codex_cli = assert_cmd::Command::cargo_bin("codex")? + .get_program() + .to_os_string(); + let codex_cli_path = std::path::PathBuf::from(codex_cli); + symlink(&codex_cli_path, &codex_linux_sandbox_exe)?; + Some(codex_linux_sandbox_exe) + } else { + None + }; + notify_readable_sandbox(&project_root_path, codex_linux_sandbox_exe, &service).await?; + + // Call the shell tool and verify that an elicitation was created and + // auto-approved. + let CallToolResult { + content, is_error, .. + } = service + .call_tool(CallToolRequestParam { + name: Cow::Borrowed("shell"), + arguments: Some(object(json!( + { + "command": "git init .", + "workdir": project_root_path.to_string_lossy(), + } + ))), + }) + .await?; + let tool_call_content = content + .first() + .expect("expected non-empty content") + .as_text() + .expect("expected text content"); + let ExecResult { + exit_code, output, .. + } = serde_json::from_str::(&tool_call_content.text)?; + let git_init_succeeded = format!( + "Initialized empty Git repository in {}/.git/\n", + project_root_path.display() + ); + // Normally, this would be an exact match, but it might include extra output + // if `git config set advice.defaultBranchName false` has not been set. + assert!( + output.contains(&git_init_succeeded), + "expected output `{output}` to contain `{git_init_succeeded}`" + ); + assert_eq!(exit_code, 0, "command should succeed"); + assert_eq!(is_error, Some(false), "command should succeed"); + assert!( + project_root_path.join(".git").is_dir(), + "git repo should exist" + ); + + let elicitation_messages = elicitation_requests + .lock() + .unwrap() + .iter() + .map(|r| r.message.clone()) + .collect::>(); + assert_eq!(vec![expected_elicitation_message], elicitation_messages); + + Ok(()) +} diff --git a/codex-rs/exec-server/tests/suite/bash b/codex-rs/exec-server/tests/suite/bash new file mode 100755 index 000000000..5f5d1e559 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/bash @@ -0,0 +1,75 @@ +#!/usr/bin/env dotslash + +// This is an instance of the fork of Bash that we bundle with +// https://www.npmjs.com/package/@openai/codex-shell-tool-mcp. +// Fetching the prebuilt version via DotSlash makes it easier to write +// integration tests for the MCP server. +// +// TODO(mbolin): Currently, we use a .tgz artifact that includes binaries for +// multiple platforms, but we could save a bit of space by making arch-specific +// artifacts available in the GitHub releases and referencing those here. +{ + "name": "codex-bash", + "platforms": { + // macOS 13 builds (and therefore x86_64) were dropped in + // https://github.com/openai/codex/pull/7295, so we only provide an + // Apple Silicon build for now. + "macos-aarch64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/aarch64-apple-darwin/bash/macos-15/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + // Note the `musl` parts of the Linux paths are misleading: the Bash + // binaries are actually linked against `glibc`, but the + // `codex-execve-wrapper` that invokes them is linked against `musl`. + "linux-x86_64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/x86_64-unknown-linux-musl/bash/ubuntu-24.04/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + "linux-aarch64": { + "size": 37003612, + "hash": "blake3", + "digest": "d9cd5928c993b65c340507931c61c02bd6e9179933f8bf26a548482bb5fa53bb", + "format": "tar.gz", + "path": "package/vendor/aarch64-unknown-linux-musl/bash/ubuntu-24.04/bash", + "providers": [ + { + "url": "https://github.com/openai/codex/releases/download/rust-v0.65.0/codex-shell-tool-mcp-npm-0.65.0.tgz" + }, + { + "type": "github-release", + "repo": "openai/codex", + "tag": "rust-v0.65.0", + "name": "codex-shell-tool-mcp-npm-0.65.0.tgz" + } + ] + }, + } +} diff --git a/codex-rs/exec-server/tests/suite/list_tools.rs b/codex-rs/exec-server/tests/suite/list_tools.rs new file mode 100644 index 000000000..17505c761 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/list_tools.rs @@ -0,0 +1,76 @@ +#![allow(clippy::unwrap_used, clippy::expect_used)] +use std::borrow::Cow; +use std::fs; +use std::sync::Arc; + +use anyhow::Result; +use exec_server_test_support::create_transport; +use pretty_assertions::assert_eq; +use rmcp::ServiceExt; +use rmcp::model::Tool; +use rmcp::model::object; +use serde_json::json; +use tempfile::TempDir; + +/// Verify the list_tools call to the MCP server returns the expected response. +#[tokio::test(flavor = "current_thread")] +async fn list_tools() -> Result<()> { + let codex_home = TempDir::new()?; + let policy_dir = codex_home.path().join("policy"); + fs::create_dir_all(&policy_dir)?; + fs::write( + policy_dir.join("default.codexpolicy"), + r#"prefix_rule(pattern=["ls"], decision="prompt")"#, + )?; + let transport = create_transport(codex_home.path())?; + + let service = ().serve(transport).await?; + let tools = service.list_tools(Default::default()).await?.tools; + assert_eq!( + vec![Tool { + name: Cow::Borrowed("shell"), + title: None, + description: Some(Cow::Borrowed( + "Runs a shell command and returns its output. You MUST provide the workdir as an absolute path." + )), + input_schema: Arc::new(object(json!({ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "properties": { + "command": { + "description": "The bash string to execute.", + "type": "string", + }, + "login": { + "description": "Launch Bash with -lc instead of -c: defaults to true.", + "nullable": true, + "type": "boolean", + }, + "timeout_ms": { + "description": "The timeout for the command in milliseconds.", + "format": "uint64", + "minimum": 0, + "nullable": true, + "type": "integer", + }, + "workdir": { + "description": "The working directory to execute the command in. Must be an absolute path.", + "type": "string", + }, + }, + "required": [ + "command", + "workdir", + ], + "title": "ExecParams", + "type": "object", + }))), + output_schema: None, + annotations: None, + icons: None, + meta: None + }], + tools + ); + + Ok(()) +} diff --git a/codex-rs/exec-server/tests/suite/mod.rs b/codex-rs/exec-server/tests/suite/mod.rs new file mode 100644 index 000000000..3a94f5857 --- /dev/null +++ b/codex-rs/exec-server/tests/suite/mod.rs @@ -0,0 +1,8 @@ +// TODO(mbolin): Get this test working on Linux. Currently, it fails with: +// +// > Error: Mcp error: -32603: sandbox error: sandbox denied exec error, +// > exit code: 1, stdout: , stderr: Error: failed to send handshake datagram +#[cfg(all(target_os = "macos", target_arch = "aarch64"))] +mod accept_elicitation; +#[cfg(any(all(target_os = "macos", target_arch = "aarch64"), target_os = "linux"))] +mod list_tools;