From 807e2c27f0a9f2e85c50e7e6df5533f0d9b853c7 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 11 Nov 2025 08:19:35 -0800 Subject: [PATCH] Add unified exec escalation handling and tests (#6492) Similar implementation to the shell tool --- codex-rs/Cargo.lock | 10 ++-- codex-rs/core/src/codex.rs | 43 +++++++++++++++ .../core/src/tools/handlers/unified_exec.rs | 44 ++++++++++++--- .../core/src/tools/runtimes/unified_exec.rs | 47 +++++++++++++--- codex-rs/core/src/tools/spec.rs | 18 +++++++ codex-rs/core/src/unified_exec/mod.rs | 4 ++ .../core/src/unified_exec/session_manager.rs | 12 ++++- codex-rs/core/tests/suite/approvals.rs | 53 +++++++++++++++++-- 8 files changed, 208 insertions(+), 23 deletions(-) diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index cf5f961a6..8794f54af 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -4450,7 +4450,7 @@ checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1" dependencies = [ "base64", "indexmap 2.12.0", - "quick-xml", + "quick-xml 0.38.0", "serde", "time", ] @@ -7093,7 +7093,7 @@ version = "0.31.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c66a47e840dc20793f2264eb4b3e4ecb4b75d91c0dd4af04b456128e0bdd449d" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.10.0", "rustix 1.0.8", "wayland-backend", "wayland-scanner", @@ -7105,7 +7105,7 @@ version = "0.32.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efa790ed75fbfd71283bd2521a1cfdc022aabcc28bdcff00851f9e4ae88d9901" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.10.0", "wayland-backend", "wayland-client", "wayland-scanner", @@ -7117,7 +7117,7 @@ version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efd94963ed43cf9938a090ca4f7da58eb55325ec8200c3848963e98dc25b78ec" dependencies = [ - "bitflags 2.9.1", + "bitflags 2.10.0", "wayland-backend", "wayland-client", "wayland-protocols", @@ -7726,7 +7726,7 @@ dependencies = [ "os_pipe", "rustix 0.38.44", "tempfile", - "thiserror 2.0.16", + "thiserror 2.0.17", "tree_magic_mini", "wayland-backend", "wayland-client", diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index d28f671f1..405e6ca3d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2323,6 +2323,7 @@ mod tests { use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use crate::tools::handlers::ShellHandler; + use crate::tools::handlers::UnifiedExecHandler; use crate::tools::registry::ToolHandler; use crate::turn_diff_tracker::TurnDiffTracker; use codex_app_server_protocol::AuthMode; @@ -3062,6 +3063,48 @@ mod tests { assert!(exec_output.output.contains("hi")); } + #[tokio::test] + async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() { + use crate::protocol::AskForApproval; + use crate::turn_diff_tracker::TurnDiffTracker; + + let (session, mut turn_context_raw) = make_session_and_context(); + turn_context_raw.approval_policy = AskForApproval::OnFailure; + let session = Arc::new(session); + let turn_context = Arc::new(turn_context_raw); + let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new())); + + let handler = UnifiedExecHandler; + let resp = handler + .handle(ToolInvocation { + session: Arc::clone(&session), + turn: Arc::clone(&turn_context), + tracker: Arc::clone(&tracker), + call_id: "exec-call".to_string(), + tool_name: "exec_command".to_string(), + payload: ToolPayload::Function { + arguments: serde_json::json!({ + "cmd": "echo hi", + "with_escalated_permissions": true, + "justification": "need unsandboxed execution", + }) + .to_string(), + }, + }) + .await; + + let Err(FunctionCallError::RespondToModel(output)) = resp else { + panic!("expected error result"); + }; + + let expected = format!( + "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", + policy = turn_context.approval_policy + ); + + pretty_assertions::assert_eq!(output, expected); + } + #[test] fn mcp_init_error_display_prompts_for_github_pat() { let server_name = "github"; diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 6542c333c..6673cf9ba 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -36,6 +36,10 @@ struct ExecCommandArgs { yield_time_ms: Option, #[serde(default)] max_output_tokens: Option, + #[serde(default)] + with_escalated_permissions: Option, + #[serde(default)] + justification: Option, } #[derive(Debug, Deserialize)] @@ -100,8 +104,30 @@ impl ToolHandler for UnifiedExecHandler { "failed to parse exec_command arguments: {err:?}" )) })?; - let workdir = args - .workdir + let ExecCommandArgs { + cmd, + workdir, + shell, + login, + yield_time_ms, + max_output_tokens, + with_escalated_permissions, + justification, + } = args; + + if with_escalated_permissions.unwrap_or(false) + && !matches!( + context.turn.approval_policy, + codex_protocol::protocol::AskForApproval::OnRequest + ) + { + return Err(FunctionCallError::RespondToModel(format!( + "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}", + policy = context.turn.approval_policy + ))); + } + + let workdir = workdir .as_deref() .filter(|value| !value.is_empty()) .map(PathBuf::from); @@ -113,18 +139,20 @@ impl ToolHandler for UnifiedExecHandler { &context.call_id, None, ); - let emitter = ToolEmitter::unified_exec(args.cmd.clone(), cwd.clone(), true); + let emitter = ToolEmitter::unified_exec(cmd.clone(), cwd.clone(), true); emitter.emit(event_ctx, ToolEventStage::Begin).await; manager .exec_command( ExecCommandRequest { - command: &args.cmd, - shell: &args.shell, - login: args.login, - yield_time_ms: args.yield_time_ms, - max_output_tokens: args.max_output_tokens, + command: &cmd, + shell: &shell, + login, + yield_time_ms, + max_output_tokens, workdir, + with_escalated_permissions, + justification, }, &context, ) diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index 47157587b..cddac1924 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -34,6 +34,8 @@ pub struct UnifiedExecRequest { pub command: Vec, pub cwd: PathBuf, pub env: HashMap, + pub with_escalated_permissions: Option, + pub justification: Option, } impl ProvidesSandboxRetryData for UnifiedExecRequest { @@ -49,6 +51,7 @@ impl ProvidesSandboxRetryData for UnifiedExecRequest { pub struct UnifiedExecApprovalKey { pub command: Vec, pub cwd: PathBuf, + pub escalated: bool, } pub struct UnifiedExecRuntime<'a> { @@ -56,8 +59,20 @@ pub struct UnifiedExecRuntime<'a> { } impl UnifiedExecRequest { - pub fn new(command: Vec, cwd: PathBuf, env: HashMap) -> Self { - Self { command, cwd, env } + pub fn new( + command: Vec, + cwd: PathBuf, + env: HashMap, + with_escalated_permissions: Option, + justification: Option, + ) -> Self { + Self { + command, + cwd, + env, + with_escalated_permissions, + justification, + } } } @@ -84,6 +99,7 @@ impl Approvable for UnifiedExecRuntime<'_> { UnifiedExecApprovalKey { command: req.command.clone(), cwd: req.cwd.clone(), + escalated: req.with_escalated_permissions.unwrap_or(false), } } @@ -98,7 +114,10 @@ impl Approvable for UnifiedExecRuntime<'_> { let call_id = ctx.call_id.to_string(); let command = req.command.clone(); let cwd = req.cwd.clone(); - let reason = ctx.retry_reason.clone(); + let reason = ctx + .retry_reason + .clone() + .or_else(|| req.justification.clone()); let risk = ctx.risk.clone(); Box::pin(async move { with_cached_approval(&session.services, key, || async move { @@ -116,7 +135,16 @@ impl Approvable for UnifiedExecRuntime<'_> { policy: AskForApproval, sandbox_policy: &SandboxPolicy, ) -> bool { - requires_initial_appoval(policy, sandbox_policy, &req.command, false) + requires_initial_appoval( + policy, + sandbox_policy, + &req.command, + req.with_escalated_permissions.unwrap_or(false), + ) + } + + fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool { + req.with_escalated_permissions.unwrap_or(false) } } @@ -127,8 +155,15 @@ impl<'a> ToolRuntime for UnifiedExecRunt attempt: &SandboxAttempt<'_>, _ctx: &ToolCtx<'_>, ) -> Result { - let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None) - .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?; + let spec = build_command_spec( + &req.command, + &req.cwd, + &req.env, + None, + req.with_escalated_permissions, + req.justification.clone(), + ) + .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?; let exec_env = attempt .env_for(&spec) .map_err(|err| ToolError::Codex(err.into()))?; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index cb1aeafdb..471f42c08 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -177,6 +177,24 @@ fn create_exec_command_tool() -> ToolSpec { ), }, ); + properties.insert( + "with_escalated_permissions".to_string(), + JsonSchema::Boolean { + description: Some( + "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions" + .to_string(), + ), + }, + ); + properties.insert( + "justification".to_string(), + JsonSchema::String { + description: Some( + "Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command." + .to_string(), + ), + }, + ); ToolSpec::Function(ResponsesApiTool { name: "exec_command".to_string(), diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index 16fbc4c7f..b996ce79e 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -71,6 +71,8 @@ pub(crate) struct ExecCommandRequest<'a> { pub yield_time_ms: Option, pub max_output_tokens: Option, pub workdir: Option, + pub with_escalated_permissions: Option, + pub justification: Option, } #[derive(Debug)] @@ -201,6 +203,8 @@ mod tests { yield_time_ms, max_output_tokens: None, workdir: None, + with_escalated_permissions: None, + justification: None, }, &context, ) diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 55a5f589a..c9763fbc4 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -51,7 +51,13 @@ impl UnifiedExecSessionManager { ]; let session = self - .open_session_with_sandbox(command, cwd.clone(), context) + .open_session_with_sandbox( + command, + cwd.clone(), + request.with_escalated_permissions, + request.justification, + context, + ) .await?; let max_tokens = resolve_max_tokens(request.max_output_tokens); @@ -317,6 +323,8 @@ impl UnifiedExecSessionManager { &self, command: Vec, cwd: PathBuf, + with_escalated_permissions: Option, + justification: Option, context: &UnifiedExecContext, ) -> Result { let mut orchestrator = ToolOrchestrator::new(); @@ -325,6 +333,8 @@ impl UnifiedExecSessionManager { command, cwd, create_env(&context.turn.shell_environment_policy), + with_escalated_permissions, + justification, ); let tool_ctx = ToolCtx { session: context.session.as_ref(), diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs index 68407367e..8316d4de5 100644 --- a/codex-rs/core/tests/suite/approvals.rs +++ b/codex-rs/core/tests/suite/approvals.rs @@ -75,6 +75,7 @@ enum ActionKind { }, RunUnifiedExecCommand { command: &'static str, + justification: Option<&'static str>, }, ApplyPatchFunction { target: TargetPath, @@ -86,6 +87,9 @@ enum ActionKind { }, } +const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str = + "Requires escalated permissions to bypass the sandbox in tests."; + impl ActionKind { async fn prepare( &self, @@ -139,8 +143,17 @@ impl ActionKind { let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?; Ok((event, Some(command))) } - ActionKind::RunUnifiedExecCommand { command } => { - let event = exec_command_event(call_id, command, Some(1000))?; + ActionKind::RunUnifiedExecCommand { + command, + justification, + } => { + let event = exec_command_event( + call_id, + command, + Some(1000), + with_escalated_permissions, + *justification, + )?; Ok(( event, Some(vec![ @@ -199,13 +212,24 @@ fn shell_event( Ok(ev_function_call(call_id, "shell", &args_str)) } -fn exec_command_event(call_id: &str, cmd: &str, yield_time_ms: Option) -> Result { +fn exec_command_event( + call_id: &str, + cmd: &str, + yield_time_ms: Option, + with_escalated_permissions: bool, + justification: Option<&str>, +) -> Result { let mut args = json!({ "cmd": cmd.to_string(), }); if let Some(yield_time_ms) = yield_time_ms { args["yield_time_ms"] = json!(yield_time_ms); } + if with_escalated_permissions { + args["with_escalated_permissions"] = json!(true); + let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION); + args["justification"] = json!(reason); + } let args_str = serde_json::to_string(&args)?; Ok(ev_function_call(call_id, "exec_command", &args_str)) } @@ -1109,6 +1133,7 @@ fn scenarios() -> Vec { sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunUnifiedExecCommand { command: "echo \"hello unified exec\"", + justification: None, }, with_escalated_permissions: false, features: vec![Feature::UnifiedExec], @@ -1118,12 +1143,34 @@ fn scenarios() -> Vec { stdout_contains: "hello unified exec", }, }, + #[cfg(not(all(target_os = "linux", target_arch = "aarch64")))] + // Linux sandbox arg0 test workaround doesn't work on ARM + ScenarioSpec { + name: "unified exec on request escalated requires approval", + approval_policy: OnRequest, + sandbox_policy: SandboxPolicy::ReadOnly, + action: ActionKind::RunUnifiedExecCommand { + command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'", + justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION), + }, + with_escalated_permissions: true, + features: vec![Feature::UnifiedExec], + model_override: None, + outcome: Outcome::ExecApproval { + decision: ReviewDecision::Approved, + expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION), + }, + expectation: Expectation::CommandSuccess { + stdout_contains: "escalated unified exec", + }, + }, ScenarioSpec { name: "unified exec on request requires approval unless trusted", approval_policy: AskForApproval::UnlessTrusted, sandbox_policy: SandboxPolicy::DangerFullAccess, action: ActionKind::RunUnifiedExecCommand { command: "git reset --hard", + justification: None, }, with_escalated_permissions: false, features: vec![Feature::UnifiedExec],