Add unified exec escalation handling and tests (#6492)

Similar implementation to the shell tool
This commit is contained in:
pakrym-oai 2025-11-11 08:19:35 -08:00 committed by GitHub
parent ad279eacdc
commit 807e2c27f0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 208 additions and 23 deletions

10
codex-rs/Cargo.lock generated
View file

@ -4450,7 +4450,7 @@ checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
dependencies = [
"base64",
"indexmap 2.12.0",
"quick-xml",
"quick-xml 0.38.0",
"serde",
"time",
]
@ -7093,7 +7093,7 @@ version = "0.31.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c66a47e840dc20793f2264eb4b3e4ecb4b75d91c0dd4af04b456128e0bdd449d"
dependencies = [
"bitflags 2.9.1",
"bitflags 2.10.0",
"rustix 1.0.8",
"wayland-backend",
"wayland-scanner",
@ -7105,7 +7105,7 @@ version = "0.32.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efa790ed75fbfd71283bd2521a1cfdc022aabcc28bdcff00851f9e4ae88d9901"
dependencies = [
"bitflags 2.9.1",
"bitflags 2.10.0",
"wayland-backend",
"wayland-client",
"wayland-scanner",
@ -7117,7 +7117,7 @@ version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "efd94963ed43cf9938a090ca4f7da58eb55325ec8200c3848963e98dc25b78ec"
dependencies = [
"bitflags 2.9.1",
"bitflags 2.10.0",
"wayland-backend",
"wayland-client",
"wayland-protocols",
@ -7726,7 +7726,7 @@ dependencies = [
"os_pipe",
"rustix 0.38.44",
"tempfile",
"thiserror 2.0.16",
"thiserror 2.0.17",
"tree_magic_mini",
"wayland-backend",
"wayland-client",

View file

@ -2323,6 +2323,7 @@ mod tests {
use crate::tools::context::ToolOutput;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::ShellHandler;
use crate::tools::handlers::UnifiedExecHandler;
use crate::tools::registry::ToolHandler;
use crate::turn_diff_tracker::TurnDiffTracker;
use codex_app_server_protocol::AuthMode;
@ -3062,6 +3063,48 @@ mod tests {
assert!(exec_output.output.contains("hi"));
}
#[tokio::test]
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
use crate::protocol::AskForApproval;
use crate::turn_diff_tracker::TurnDiffTracker;
let (session, mut turn_context_raw) = make_session_and_context();
turn_context_raw.approval_policy = AskForApproval::OnFailure;
let session = Arc::new(session);
let turn_context = Arc::new(turn_context_raw);
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
let handler = UnifiedExecHandler;
let resp = handler
.handle(ToolInvocation {
session: Arc::clone(&session),
turn: Arc::clone(&turn_context),
tracker: Arc::clone(&tracker),
call_id: "exec-call".to_string(),
tool_name: "exec_command".to_string(),
payload: ToolPayload::Function {
arguments: serde_json::json!({
"cmd": "echo hi",
"with_escalated_permissions": true,
"justification": "need unsandboxed execution",
})
.to_string(),
},
})
.await;
let Err(FunctionCallError::RespondToModel(output)) = resp else {
panic!("expected error result");
};
let expected = format!(
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
policy = turn_context.approval_policy
);
pretty_assertions::assert_eq!(output, expected);
}
#[test]
fn mcp_init_error_display_prompts_for_github_pat() {
let server_name = "github";

View file

@ -36,6 +36,10 @@ struct ExecCommandArgs {
yield_time_ms: Option<u64>,
#[serde(default)]
max_output_tokens: Option<usize>,
#[serde(default)]
with_escalated_permissions: Option<bool>,
#[serde(default)]
justification: Option<String>,
}
#[derive(Debug, Deserialize)]
@ -100,8 +104,30 @@ impl ToolHandler for UnifiedExecHandler {
"failed to parse exec_command arguments: {err:?}"
))
})?;
let workdir = args
.workdir
let ExecCommandArgs {
cmd,
workdir,
shell,
login,
yield_time_ms,
max_output_tokens,
with_escalated_permissions,
justification,
} = args;
if with_escalated_permissions.unwrap_or(false)
&& !matches!(
context.turn.approval_policy,
codex_protocol::protocol::AskForApproval::OnRequest
)
{
return Err(FunctionCallError::RespondToModel(format!(
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
policy = context.turn.approval_policy
)));
}
let workdir = workdir
.as_deref()
.filter(|value| !value.is_empty())
.map(PathBuf::from);
@ -113,18 +139,20 @@ impl ToolHandler for UnifiedExecHandler {
&context.call_id,
None,
);
let emitter = ToolEmitter::unified_exec(args.cmd.clone(), cwd.clone(), true);
let emitter = ToolEmitter::unified_exec(cmd.clone(), cwd.clone(), true);
emitter.emit(event_ctx, ToolEventStage::Begin).await;
manager
.exec_command(
ExecCommandRequest {
command: &args.cmd,
shell: &args.shell,
login: args.login,
yield_time_ms: args.yield_time_ms,
max_output_tokens: args.max_output_tokens,
command: &cmd,
shell: &shell,
login,
yield_time_ms,
max_output_tokens,
workdir,
with_escalated_permissions,
justification,
},
&context,
)

View file

@ -34,6 +34,8 @@ pub struct UnifiedExecRequest {
pub command: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
impl ProvidesSandboxRetryData for UnifiedExecRequest {
@ -49,6 +51,7 @@ impl ProvidesSandboxRetryData for UnifiedExecRequest {
pub struct UnifiedExecApprovalKey {
pub command: Vec<String>,
pub cwd: PathBuf,
pub escalated: bool,
}
pub struct UnifiedExecRuntime<'a> {
@ -56,8 +59,20 @@ pub struct UnifiedExecRuntime<'a> {
}
impl UnifiedExecRequest {
pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
Self { command, cwd, env }
pub fn new(
command: Vec<String>,
cwd: PathBuf,
env: HashMap<String, String>,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
) -> Self {
Self {
command,
cwd,
env,
with_escalated_permissions,
justification,
}
}
}
@ -84,6 +99,7 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
UnifiedExecApprovalKey {
command: req.command.clone(),
cwd: req.cwd.clone(),
escalated: req.with_escalated_permissions.unwrap_or(false),
}
}
@ -98,7 +114,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
let call_id = ctx.call_id.to_string();
let command = req.command.clone();
let cwd = req.cwd.clone();
let reason = ctx.retry_reason.clone();
let reason = ctx
.retry_reason
.clone()
.or_else(|| req.justification.clone());
let risk = ctx.risk.clone();
Box::pin(async move {
with_cached_approval(&session.services, key, || async move {
@ -116,7 +135,16 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
policy: AskForApproval,
sandbox_policy: &SandboxPolicy,
) -> bool {
requires_initial_appoval(policy, sandbox_policy, &req.command, false)
requires_initial_appoval(
policy,
sandbox_policy,
&req.command,
req.with_escalated_permissions.unwrap_or(false),
)
}
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
req.with_escalated_permissions.unwrap_or(false)
}
}
@ -127,8 +155,15 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
attempt: &SandboxAttempt<'_>,
_ctx: &ToolCtx<'_>,
) -> Result<UnifiedExecSession, ToolError> {
let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
let spec = build_command_spec(
&req.command,
&req.cwd,
&req.env,
None,
req.with_escalated_permissions,
req.justification.clone(),
)
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
let exec_env = attempt
.env_for(&spec)
.map_err(|err| ToolError::Codex(err.into()))?;

View file

@ -177,6 +177,24 @@ fn create_exec_command_tool() -> ToolSpec {
),
},
);
properties.insert(
"with_escalated_permissions".to_string(),
JsonSchema::Boolean {
description: Some(
"Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
.to_string(),
),
},
);
properties.insert(
"justification".to_string(),
JsonSchema::String {
description: Some(
"Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command."
.to_string(),
),
},
);
ToolSpec::Function(ResponsesApiTool {
name: "exec_command".to_string(),

View file

@ -71,6 +71,8 @@ pub(crate) struct ExecCommandRequest<'a> {
pub yield_time_ms: Option<u64>,
pub max_output_tokens: Option<usize>,
pub workdir: Option<PathBuf>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
#[derive(Debug)]
@ -201,6 +203,8 @@ mod tests {
yield_time_ms,
max_output_tokens: None,
workdir: None,
with_escalated_permissions: None,
justification: None,
},
&context,
)

View file

@ -51,7 +51,13 @@ impl UnifiedExecSessionManager {
];
let session = self
.open_session_with_sandbox(command, cwd.clone(), context)
.open_session_with_sandbox(
command,
cwd.clone(),
request.with_escalated_permissions,
request.justification,
context,
)
.await?;
let max_tokens = resolve_max_tokens(request.max_output_tokens);
@ -317,6 +323,8 @@ impl UnifiedExecSessionManager {
&self,
command: Vec<String>,
cwd: PathBuf,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
context: &UnifiedExecContext,
) -> Result<UnifiedExecSession, UnifiedExecError> {
let mut orchestrator = ToolOrchestrator::new();
@ -325,6 +333,8 @@ impl UnifiedExecSessionManager {
command,
cwd,
create_env(&context.turn.shell_environment_policy),
with_escalated_permissions,
justification,
);
let tool_ctx = ToolCtx {
session: context.session.as_ref(),

View file

@ -75,6 +75,7 @@ enum ActionKind {
},
RunUnifiedExecCommand {
command: &'static str,
justification: Option<&'static str>,
},
ApplyPatchFunction {
target: TargetPath,
@ -86,6 +87,9 @@ enum ActionKind {
},
}
const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str =
"Requires escalated permissions to bypass the sandbox in tests.";
impl ActionKind {
async fn prepare(
&self,
@ -139,8 +143,17 @@ impl ActionKind {
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
Ok((event, Some(command)))
}
ActionKind::RunUnifiedExecCommand { command } => {
let event = exec_command_event(call_id, command, Some(1000))?;
ActionKind::RunUnifiedExecCommand {
command,
justification,
} => {
let event = exec_command_event(
call_id,
command,
Some(1000),
with_escalated_permissions,
*justification,
)?;
Ok((
event,
Some(vec![
@ -199,13 +212,24 @@ fn shell_event(
Ok(ev_function_call(call_id, "shell", &args_str))
}
fn exec_command_event(call_id: &str, cmd: &str, yield_time_ms: Option<u64>) -> Result<Value> {
fn exec_command_event(
call_id: &str,
cmd: &str,
yield_time_ms: Option<u64>,
with_escalated_permissions: bool,
justification: Option<&str>,
) -> Result<Value> {
let mut args = json!({
"cmd": cmd.to_string(),
});
if let Some(yield_time_ms) = yield_time_ms {
args["yield_time_ms"] = json!(yield_time_ms);
}
if with_escalated_permissions {
args["with_escalated_permissions"] = json!(true);
let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
args["justification"] = json!(reason);
}
let args_str = serde_json::to_string(&args)?;
Ok(ev_function_call(call_id, "exec_command", &args_str))
}
@ -1109,6 +1133,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunUnifiedExecCommand {
command: "echo \"hello unified exec\"",
justification: None,
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],
@ -1118,12 +1143,34 @@ fn scenarios() -> Vec<ScenarioSpec> {
stdout_contains: "hello unified exec",
},
},
#[cfg(not(all(target_os = "linux", target_arch = "aarch64")))]
// Linux sandbox arg0 test workaround doesn't work on ARM
ScenarioSpec {
name: "unified exec on request escalated requires approval",
approval_policy: OnRequest,
sandbox_policy: SandboxPolicy::ReadOnly,
action: ActionKind::RunUnifiedExecCommand {
command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
},
with_escalated_permissions: true,
features: vec![Feature::UnifiedExec],
model_override: None,
outcome: Outcome::ExecApproval {
decision: ReviewDecision::Approved,
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
},
expectation: Expectation::CommandSuccess {
stdout_contains: "escalated unified exec",
},
},
ScenarioSpec {
name: "unified exec on request requires approval unless trusted",
approval_policy: AskForApproval::UnlessTrusted,
sandbox_policy: SandboxPolicy::DangerFullAccess,
action: ActionKind::RunUnifiedExecCommand {
command: "git reset --hard",
justification: None,
},
with_escalated_permissions: false,
features: vec![Feature::UnifiedExec],