From 807e2c27f0a9f2e85c50e7e6df5533f0d9b853c7 Mon Sep 17 00:00:00 2001
From: pakrym-oai <pakrym@openai.com>
Date: Tue, 11 Nov 2025 08:19:35 -0800
Subject: [PATCH] Add unified exec escalation handling and tests (#6492)

Similar implementation to the shell tool
---
 codex-rs/Cargo.lock                           | 10 ++--
 codex-rs/core/src/codex.rs                    | 43 +++++++++++++++
 .../core/src/tools/handlers/unified_exec.rs   | 44 ++++++++++++---
 .../core/src/tools/runtimes/unified_exec.rs   | 47 +++++++++++++---
 codex-rs/core/src/tools/spec.rs               | 18 +++++++
 codex-rs/core/src/unified_exec/mod.rs         |  4 ++
 .../core/src/unified_exec/session_manager.rs  | 12 ++++-
 codex-rs/core/tests/suite/approvals.rs        | 53 +++++++++++++++++--
 8 files changed, 208 insertions(+), 23 deletions(-)

diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index cf5f961a6..8794f54af 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -4450,7 +4450,7 @@ checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
 dependencies = [
  "base64",
  "indexmap 2.12.0",
- "quick-xml",
+ "quick-xml 0.38.0",
  "serde",
  "time",
 ]
@@ -7093,7 +7093,7 @@ version = "0.31.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c66a47e840dc20793f2264eb4b3e4ecb4b75d91c0dd4af04b456128e0bdd449d"
 dependencies = [
- "bitflags 2.9.1",
+ "bitflags 2.10.0",
  "rustix 1.0.8",
  "wayland-backend",
  "wayland-scanner",
@@ -7105,7 +7105,7 @@ version = "0.32.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "efa790ed75fbfd71283bd2521a1cfdc022aabcc28bdcff00851f9e4ae88d9901"
 dependencies = [
- "bitflags 2.9.1",
+ "bitflags 2.10.0",
  "wayland-backend",
  "wayland-client",
  "wayland-scanner",
@@ -7117,7 +7117,7 @@ version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "efd94963ed43cf9938a090ca4f7da58eb55325ec8200c3848963e98dc25b78ec"
 dependencies = [
- "bitflags 2.9.1",
+ "bitflags 2.10.0",
  "wayland-backend",
  "wayland-client",
  "wayland-protocols",
@@ -7726,7 +7726,7 @@ dependencies = [
  "os_pipe",
  "rustix 0.38.44",
  "tempfile",
- "thiserror 2.0.16",
+ "thiserror 2.0.17",
  "tree_magic_mini",
  "wayland-backend",
  "wayland-client",
diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs
index d28f671f1..405e6ca3d 100644
--- a/codex-rs/core/src/codex.rs
+++ b/codex-rs/core/src/codex.rs
@@ -2323,6 +2323,7 @@ mod tests {
     use crate::tools::context::ToolOutput;
     use crate::tools::context::ToolPayload;
     use crate::tools::handlers::ShellHandler;
+    use crate::tools::handlers::UnifiedExecHandler;
     use crate::tools::registry::ToolHandler;
     use crate::turn_diff_tracker::TurnDiffTracker;
     use codex_app_server_protocol::AuthMode;
@@ -3062,6 +3063,48 @@ mod tests {
         assert!(exec_output.output.contains("hi"));
     }
 
+    #[tokio::test]
+    async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
+        use crate::protocol::AskForApproval;
+        use crate::turn_diff_tracker::TurnDiffTracker;
+
+        let (session, mut turn_context_raw) = make_session_and_context();
+        turn_context_raw.approval_policy = AskForApproval::OnFailure;
+        let session = Arc::new(session);
+        let turn_context = Arc::new(turn_context_raw);
+        let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
+
+        let handler = UnifiedExecHandler;
+        let resp = handler
+            .handle(ToolInvocation {
+                session: Arc::clone(&session),
+                turn: Arc::clone(&turn_context),
+                tracker: Arc::clone(&tracker),
+                call_id: "exec-call".to_string(),
+                tool_name: "exec_command".to_string(),
+                payload: ToolPayload::Function {
+                    arguments: serde_json::json!({
+                        "cmd": "echo hi",
+                        "with_escalated_permissions": true,
+                        "justification": "need unsandboxed execution",
+                    })
+                    .to_string(),
+                },
+            })
+            .await;
+
+        let Err(FunctionCallError::RespondToModel(output)) = resp else {
+            panic!("expected error result");
+        };
+
+        let expected = format!(
+            "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
+            policy = turn_context.approval_policy
+        );
+
+        pretty_assertions::assert_eq!(output, expected);
+    }
+
     #[test]
     fn mcp_init_error_display_prompts_for_github_pat() {
         let server_name = "github";
diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs
index 6542c333c..6673cf9ba 100644
--- a/codex-rs/core/src/tools/handlers/unified_exec.rs
+++ b/codex-rs/core/src/tools/handlers/unified_exec.rs
@@ -36,6 +36,10 @@ struct ExecCommandArgs {
     yield_time_ms: Option<u64>,
     #[serde(default)]
     max_output_tokens: Option<usize>,
+    #[serde(default)]
+    with_escalated_permissions: Option<bool>,
+    #[serde(default)]
+    justification: Option<String>,
 }
 
 #[derive(Debug, Deserialize)]
@@ -100,8 +104,30 @@ impl ToolHandler for UnifiedExecHandler {
                         "failed to parse exec_command arguments: {err:?}"
                     ))
                 })?;
-                let workdir = args
-                    .workdir
+                let ExecCommandArgs {
+                    cmd,
+                    workdir,
+                    shell,
+                    login,
+                    yield_time_ms,
+                    max_output_tokens,
+                    with_escalated_permissions,
+                    justification,
+                } = args;
+
+                if with_escalated_permissions.unwrap_or(false)
+                    && !matches!(
+                        context.turn.approval_policy,
+                        codex_protocol::protocol::AskForApproval::OnRequest
+                    )
+                {
+                    return Err(FunctionCallError::RespondToModel(format!(
+                        "approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
+                        policy = context.turn.approval_policy
+                    )));
+                }
+
+                let workdir = workdir
                     .as_deref()
                     .filter(|value| !value.is_empty())
                     .map(PathBuf::from);
@@ -113,18 +139,20 @@ impl ToolHandler for UnifiedExecHandler {
                     &context.call_id,
                     None,
                 );
-                let emitter = ToolEmitter::unified_exec(args.cmd.clone(), cwd.clone(), true);
+                let emitter = ToolEmitter::unified_exec(cmd.clone(), cwd.clone(), true);
                 emitter.emit(event_ctx, ToolEventStage::Begin).await;
 
                 manager
                     .exec_command(
                         ExecCommandRequest {
-                            command: &args.cmd,
-                            shell: &args.shell,
-                            login: args.login,
-                            yield_time_ms: args.yield_time_ms,
-                            max_output_tokens: args.max_output_tokens,
+                            command: &cmd,
+                            shell: &shell,
+                            login,
+                            yield_time_ms,
+                            max_output_tokens,
                             workdir,
+                            with_escalated_permissions,
+                            justification,
                         },
                         &context,
                     )
diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs
index 47157587b..cddac1924 100644
--- a/codex-rs/core/src/tools/runtimes/unified_exec.rs
+++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs
@@ -34,6 +34,8 @@ pub struct UnifiedExecRequest {
     pub command: Vec<String>,
     pub cwd: PathBuf,
     pub env: HashMap<String, String>,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
 }
 
 impl ProvidesSandboxRetryData for UnifiedExecRequest {
@@ -49,6 +51,7 @@ impl ProvidesSandboxRetryData for UnifiedExecRequest {
 pub struct UnifiedExecApprovalKey {
     pub command: Vec<String>,
     pub cwd: PathBuf,
+    pub escalated: bool,
 }
 
 pub struct UnifiedExecRuntime<'a> {
@@ -56,8 +59,20 @@ pub struct UnifiedExecRuntime<'a> {
 }
 
 impl UnifiedExecRequest {
-    pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
-        Self { command, cwd, env }
+    pub fn new(
+        command: Vec<String>,
+        cwd: PathBuf,
+        env: HashMap<String, String>,
+        with_escalated_permissions: Option<bool>,
+        justification: Option<String>,
+    ) -> Self {
+        Self {
+            command,
+            cwd,
+            env,
+            with_escalated_permissions,
+            justification,
+        }
     }
 }
 
@@ -84,6 +99,7 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
         UnifiedExecApprovalKey {
             command: req.command.clone(),
             cwd: req.cwd.clone(),
+            escalated: req.with_escalated_permissions.unwrap_or(false),
         }
     }
 
@@ -98,7 +114,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
         let call_id = ctx.call_id.to_string();
         let command = req.command.clone();
         let cwd = req.cwd.clone();
-        let reason = ctx.retry_reason.clone();
+        let reason = ctx
+            .retry_reason
+            .clone()
+            .or_else(|| req.justification.clone());
         let risk = ctx.risk.clone();
         Box::pin(async move {
             with_cached_approval(&session.services, key, || async move {
@@ -116,7 +135,16 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
         policy: AskForApproval,
         sandbox_policy: &SandboxPolicy,
     ) -> bool {
-        requires_initial_appoval(policy, sandbox_policy, &req.command, false)
+        requires_initial_appoval(
+            policy,
+            sandbox_policy,
+            &req.command,
+            req.with_escalated_permissions.unwrap_or(false),
+        )
+    }
+
+    fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
+        req.with_escalated_permissions.unwrap_or(false)
     }
 }
 
@@ -127,8 +155,15 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
         attempt: &SandboxAttempt<'_>,
         _ctx: &ToolCtx<'_>,
     ) -> Result<UnifiedExecSession, ToolError> {
-        let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
-            .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
+        let spec = build_command_spec(
+            &req.command,
+            &req.cwd,
+            &req.env,
+            None,
+            req.with_escalated_permissions,
+            req.justification.clone(),
+        )
+        .map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
         let exec_env = attempt
             .env_for(&spec)
             .map_err(|err| ToolError::Codex(err.into()))?;
diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs
index cb1aeafdb..471f42c08 100644
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -177,6 +177,24 @@ fn create_exec_command_tool() -> ToolSpec {
             ),
         },
     );
+    properties.insert(
+        "with_escalated_permissions".to_string(),
+        JsonSchema::Boolean {
+            description: Some(
+                "Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
+                    .to_string(),
+            ),
+        },
+    );
+    properties.insert(
+        "justification".to_string(),
+        JsonSchema::String {
+            description: Some(
+                "Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command."
+                    .to_string(),
+            ),
+        },
+    );
 
     ToolSpec::Function(ResponsesApiTool {
         name: "exec_command".to_string(),
diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs
index 16fbc4c7f..b996ce79e 100644
--- a/codex-rs/core/src/unified_exec/mod.rs
+++ b/codex-rs/core/src/unified_exec/mod.rs
@@ -71,6 +71,8 @@ pub(crate) struct ExecCommandRequest<'a> {
     pub yield_time_ms: Option<u64>,
     pub max_output_tokens: Option<usize>,
     pub workdir: Option<PathBuf>,
+    pub with_escalated_permissions: Option<bool>,
+    pub justification: Option<String>,
 }
 
 #[derive(Debug)]
@@ -201,6 +203,8 @@ mod tests {
                     yield_time_ms,
                     max_output_tokens: None,
                     workdir: None,
+                    with_escalated_permissions: None,
+                    justification: None,
                 },
                 &context,
             )
diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs
index 55a5f589a..c9763fbc4 100644
--- a/codex-rs/core/src/unified_exec/session_manager.rs
+++ b/codex-rs/core/src/unified_exec/session_manager.rs
@@ -51,7 +51,13 @@ impl UnifiedExecSessionManager {
         ];
 
         let session = self
-            .open_session_with_sandbox(command, cwd.clone(), context)
+            .open_session_with_sandbox(
+                command,
+                cwd.clone(),
+                request.with_escalated_permissions,
+                request.justification,
+                context,
+            )
             .await?;
 
         let max_tokens = resolve_max_tokens(request.max_output_tokens);
@@ -317,6 +323,8 @@ impl UnifiedExecSessionManager {
         &self,
         command: Vec<String>,
         cwd: PathBuf,
+        with_escalated_permissions: Option<bool>,
+        justification: Option<String>,
         context: &UnifiedExecContext,
     ) -> Result<UnifiedExecSession, UnifiedExecError> {
         let mut orchestrator = ToolOrchestrator::new();
@@ -325,6 +333,8 @@ impl UnifiedExecSessionManager {
             command,
             cwd,
             create_env(&context.turn.shell_environment_policy),
+            with_escalated_permissions,
+            justification,
         );
         let tool_ctx = ToolCtx {
             session: context.session.as_ref(),
diff --git a/codex-rs/core/tests/suite/approvals.rs b/codex-rs/core/tests/suite/approvals.rs
index 68407367e..8316d4de5 100644
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@@ -75,6 +75,7 @@ enum ActionKind {
     },
     RunUnifiedExecCommand {
         command: &'static str,
+        justification: Option<&'static str>,
     },
     ApplyPatchFunction {
         target: TargetPath,
@@ -86,6 +87,9 @@ enum ActionKind {
     },
 }
 
+const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str =
+    "Requires escalated permissions to bypass the sandbox in tests.";
+
 impl ActionKind {
     async fn prepare(
         &self,
@@ -139,8 +143,17 @@ impl ActionKind {
                 let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
                 Ok((event, Some(command)))
             }
-            ActionKind::RunUnifiedExecCommand { command } => {
-                let event = exec_command_event(call_id, command, Some(1000))?;
+            ActionKind::RunUnifiedExecCommand {
+                command,
+                justification,
+            } => {
+                let event = exec_command_event(
+                    call_id,
+                    command,
+                    Some(1000),
+                    with_escalated_permissions,
+                    *justification,
+                )?;
                 Ok((
                     event,
                     Some(vec![
@@ -199,13 +212,24 @@ fn shell_event(
     Ok(ev_function_call(call_id, "shell", &args_str))
 }
 
-fn exec_command_event(call_id: &str, cmd: &str, yield_time_ms: Option<u64>) -> Result<Value> {
+fn exec_command_event(
+    call_id: &str,
+    cmd: &str,
+    yield_time_ms: Option<u64>,
+    with_escalated_permissions: bool,
+    justification: Option<&str>,
+) -> Result<Value> {
     let mut args = json!({
         "cmd": cmd.to_string(),
     });
     if let Some(yield_time_ms) = yield_time_ms {
         args["yield_time_ms"] = json!(yield_time_ms);
     }
+    if with_escalated_permissions {
+        args["with_escalated_permissions"] = json!(true);
+        let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
+        args["justification"] = json!(reason);
+    }
     let args_str = serde_json::to_string(&args)?;
     Ok(ev_function_call(call_id, "exec_command", &args_str))
 }
@@ -1109,6 +1133,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
             sandbox_policy: SandboxPolicy::DangerFullAccess,
             action: ActionKind::RunUnifiedExecCommand {
                 command: "echo \"hello unified exec\"",
+                justification: None,
             },
             with_escalated_permissions: false,
             features: vec![Feature::UnifiedExec],
@@ -1118,12 +1143,34 @@ fn scenarios() -> Vec<ScenarioSpec> {
                 stdout_contains: "hello unified exec",
             },
         },
+        #[cfg(not(all(target_os = "linux", target_arch = "aarch64")))]
+        // Linux sandbox arg0 test workaround doesn't work on ARM
+        ScenarioSpec {
+            name: "unified exec on request escalated requires approval",
+            approval_policy: OnRequest,
+            sandbox_policy: SandboxPolicy::ReadOnly,
+            action: ActionKind::RunUnifiedExecCommand {
+                command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
+                justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
+            },
+            with_escalated_permissions: true,
+            features: vec![Feature::UnifiedExec],
+            model_override: None,
+            outcome: Outcome::ExecApproval {
+                decision: ReviewDecision::Approved,
+                expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
+            },
+            expectation: Expectation::CommandSuccess {
+                stdout_contains: "escalated unified exec",
+            },
+        },
         ScenarioSpec {
             name: "unified exec on request requires approval unless trusted",
             approval_policy: AskForApproval::UnlessTrusted,
             sandbox_policy: SandboxPolicy::DangerFullAccess,
             action: ActionKind::RunUnifiedExecCommand {
                 command: "git reset --hard",
+                justification: None,
             },
             with_escalated_permissions: false,
             features: vec![Feature::UnifiedExec],