Add unified exec escalation handling and tests (#6492)
Similar implementation to the shell tool
This commit is contained in:
parent
ad279eacdc
commit
807e2c27f0
8 changed files with 208 additions and 23 deletions
10
codex-rs/Cargo.lock
generated
10
codex-rs/Cargo.lock
generated
|
|
@ -4450,7 +4450,7 @@ checksum = "3af6b589e163c5a788fab00ce0c0366f6efbb9959c2f9874b224936af7fce7e1"
|
|||
dependencies = [
|
||||
"base64",
|
||||
"indexmap 2.12.0",
|
||||
"quick-xml",
|
||||
"quick-xml 0.38.0",
|
||||
"serde",
|
||||
"time",
|
||||
]
|
||||
|
|
@ -7093,7 +7093,7 @@ version = "0.31.11"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c66a47e840dc20793f2264eb4b3e4ecb4b75d91c0dd4af04b456128e0bdd449d"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"bitflags 2.10.0",
|
||||
"rustix 1.0.8",
|
||||
"wayland-backend",
|
||||
"wayland-scanner",
|
||||
|
|
@ -7105,7 +7105,7 @@ version = "0.32.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efa790ed75fbfd71283bd2521a1cfdc022aabcc28bdcff00851f9e4ae88d9901"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"bitflags 2.10.0",
|
||||
"wayland-backend",
|
||||
"wayland-client",
|
||||
"wayland-scanner",
|
||||
|
|
@ -7117,7 +7117,7 @@ version = "0.3.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "efd94963ed43cf9938a090ca4f7da58eb55325ec8200c3848963e98dc25b78ec"
|
||||
dependencies = [
|
||||
"bitflags 2.9.1",
|
||||
"bitflags 2.10.0",
|
||||
"wayland-backend",
|
||||
"wayland-client",
|
||||
"wayland-protocols",
|
||||
|
|
@ -7726,7 +7726,7 @@ dependencies = [
|
|||
"os_pipe",
|
||||
"rustix 0.38.44",
|
||||
"tempfile",
|
||||
"thiserror 2.0.16",
|
||||
"thiserror 2.0.17",
|
||||
"tree_magic_mini",
|
||||
"wayland-backend",
|
||||
"wayland-client",
|
||||
|
|
|
|||
|
|
@ -2323,6 +2323,7 @@ mod tests {
|
|||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::ShellHandler;
|
||||
use crate::tools::handlers::UnifiedExecHandler;
|
||||
use crate::tools::registry::ToolHandler;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
|
|
@ -3062,6 +3063,48 @@ mod tests {
|
|||
assert!(exec_output.output.contains("hi"));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_rejects_escalated_permissions_when_policy_not_on_request() {
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
|
||||
let (session, mut turn_context_raw) = make_session_and_context();
|
||||
turn_context_raw.approval_policy = AskForApproval::OnFailure;
|
||||
let session = Arc::new(session);
|
||||
let turn_context = Arc::new(turn_context_raw);
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
|
||||
let handler = UnifiedExecHandler;
|
||||
let resp = handler
|
||||
.handle(ToolInvocation {
|
||||
session: Arc::clone(&session),
|
||||
turn: Arc::clone(&turn_context),
|
||||
tracker: Arc::clone(&tracker),
|
||||
call_id: "exec-call".to_string(),
|
||||
tool_name: "exec_command".to_string(),
|
||||
payload: ToolPayload::Function {
|
||||
arguments: serde_json::json!({
|
||||
"cmd": "echo hi",
|
||||
"with_escalated_permissions": true,
|
||||
"justification": "need unsandboxed execution",
|
||||
})
|
||||
.to_string(),
|
||||
},
|
||||
})
|
||||
.await;
|
||||
|
||||
let Err(FunctionCallError::RespondToModel(output)) = resp else {
|
||||
panic!("expected error result");
|
||||
};
|
||||
|
||||
let expected = format!(
|
||||
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = turn_context.approval_policy
|
||||
);
|
||||
|
||||
pretty_assertions::assert_eq!(output, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mcp_init_error_display_prompts_for_github_pat() {
|
||||
let server_name = "github";
|
||||
|
|
|
|||
|
|
@ -36,6 +36,10 @@ struct ExecCommandArgs {
|
|||
yield_time_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
max_output_tokens: Option<usize>,
|
||||
#[serde(default)]
|
||||
with_escalated_permissions: Option<bool>,
|
||||
#[serde(default)]
|
||||
justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize)]
|
||||
|
|
@ -100,8 +104,30 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
"failed to parse exec_command arguments: {err:?}"
|
||||
))
|
||||
})?;
|
||||
let workdir = args
|
||||
.workdir
|
||||
let ExecCommandArgs {
|
||||
cmd,
|
||||
workdir,
|
||||
shell,
|
||||
login,
|
||||
yield_time_ms,
|
||||
max_output_tokens,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
} = args;
|
||||
|
||||
if with_escalated_permissions.unwrap_or(false)
|
||||
&& !matches!(
|
||||
context.turn.approval_policy,
|
||||
codex_protocol::protocol::AskForApproval::OnRequest
|
||||
)
|
||||
{
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"approval policy is {policy:?}; reject command — you cannot ask for escalated permissions if the approval policy is {policy:?}",
|
||||
policy = context.turn.approval_policy
|
||||
)));
|
||||
}
|
||||
|
||||
let workdir = workdir
|
||||
.as_deref()
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(PathBuf::from);
|
||||
|
|
@ -113,18 +139,20 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
&context.call_id,
|
||||
None,
|
||||
);
|
||||
let emitter = ToolEmitter::unified_exec(args.cmd.clone(), cwd.clone(), true);
|
||||
let emitter = ToolEmitter::unified_exec(cmd.clone(), cwd.clone(), true);
|
||||
emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
manager
|
||||
.exec_command(
|
||||
ExecCommandRequest {
|
||||
command: &args.cmd,
|
||||
shell: &args.shell,
|
||||
login: args.login,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
command: &cmd,
|
||||
shell: &shell,
|
||||
login,
|
||||
yield_time_ms,
|
||||
max_output_tokens,
|
||||
workdir,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -34,6 +34,8 @@ pub struct UnifiedExecRequest {
|
|||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub env: HashMap<String, String>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
|
|
@ -49,6 +51,7 @@ impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
|||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub escalated: bool,
|
||||
}
|
||||
|
||||
pub struct UnifiedExecRuntime<'a> {
|
||||
|
|
@ -56,8 +59,20 @@ pub struct UnifiedExecRuntime<'a> {
|
|||
}
|
||||
|
||||
impl UnifiedExecRequest {
|
||||
pub fn new(command: Vec<String>, cwd: PathBuf, env: HashMap<String, String>) -> Self {
|
||||
Self { command, cwd, env }
|
||||
pub fn new(
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
env: HashMap<String, String>,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
) -> Self {
|
||||
Self {
|
||||
command,
|
||||
cwd,
|
||||
env,
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -84,6 +99,7 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
|||
UnifiedExecApprovalKey {
|
||||
command: req.command.clone(),
|
||||
cwd: req.cwd.clone(),
|
||||
escalated: req.with_escalated_permissions.unwrap_or(false),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -98,7 +114,10 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
|||
let call_id = ctx.call_id.to_string();
|
||||
let command = req.command.clone();
|
||||
let cwd = req.cwd.clone();
|
||||
let reason = ctx.retry_reason.clone();
|
||||
let reason = ctx
|
||||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let risk = ctx.risk.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
|
|
@ -116,7 +135,16 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
|||
policy: AskForApproval,
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
) -> bool {
|
||||
requires_initial_appoval(policy, sandbox_policy, &req.command, false)
|
||||
requires_initial_appoval(
|
||||
policy,
|
||||
sandbox_policy,
|
||||
&req.command,
|
||||
req.with_escalated_permissions.unwrap_or(false),
|
||||
)
|
||||
}
|
||||
|
||||
fn wants_escalated_first_attempt(&self, req: &UnifiedExecRequest) -> bool {
|
||||
req.with_escalated_permissions.unwrap_or(false)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -127,8 +155,15 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
|||
attempt: &SandboxAttempt<'_>,
|
||||
_ctx: &ToolCtx<'_>,
|
||||
) -> Result<UnifiedExecSession, ToolError> {
|
||||
let spec = build_command_spec(&req.command, &req.cwd, &req.env, None, None, None)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let spec = build_command_spec(
|
||||
&req.command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
None,
|
||||
req.with_escalated_permissions,
|
||||
req.justification.clone(),
|
||||
)
|
||||
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
|
||||
let exec_env = attempt
|
||||
.env_for(&spec)
|
||||
.map_err(|err| ToolError::Codex(err.into()))?;
|
||||
|
|
|
|||
|
|
@ -177,6 +177,24 @@ fn create_exec_command_tool() -> ToolSpec {
|
|||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"with_escalated_permissions".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Whether to request escalated permissions. Set to true if command needs to be run without sandbox restrictions"
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"justification".to_string(),
|
||||
JsonSchema::String {
|
||||
description: Some(
|
||||
"Only set if with_escalated_permissions is true. 1-sentence explanation of why we want to run this command."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
|
||||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: "exec_command".to_string(),
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ pub(crate) struct ExecCommandRequest<'a> {
|
|||
pub yield_time_ms: Option<u64>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub workdir: Option<PathBuf>,
|
||||
pub with_escalated_permissions: Option<bool>,
|
||||
pub justification: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
@ -201,6 +203,8 @@ mod tests {
|
|||
yield_time_ms,
|
||||
max_output_tokens: None,
|
||||
workdir: None,
|
||||
with_escalated_permissions: None,
|
||||
justification: None,
|
||||
},
|
||||
&context,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,13 @@ impl UnifiedExecSessionManager {
|
|||
];
|
||||
|
||||
let session = self
|
||||
.open_session_with_sandbox(command, cwd.clone(), context)
|
||||
.open_session_with_sandbox(
|
||||
command,
|
||||
cwd.clone(),
|
||||
request.with_escalated_permissions,
|
||||
request.justification,
|
||||
context,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let max_tokens = resolve_max_tokens(request.max_output_tokens);
|
||||
|
|
@ -317,6 +323,8 @@ impl UnifiedExecSessionManager {
|
|||
&self,
|
||||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
with_escalated_permissions: Option<bool>,
|
||||
justification: Option<String>,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<UnifiedExecSession, UnifiedExecError> {
|
||||
let mut orchestrator = ToolOrchestrator::new();
|
||||
|
|
@ -325,6 +333,8 @@ impl UnifiedExecSessionManager {
|
|||
command,
|
||||
cwd,
|
||||
create_env(&context.turn.shell_environment_policy),
|
||||
with_escalated_permissions,
|
||||
justification,
|
||||
);
|
||||
let tool_ctx = ToolCtx {
|
||||
session: context.session.as_ref(),
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ enum ActionKind {
|
|||
},
|
||||
RunUnifiedExecCommand {
|
||||
command: &'static str,
|
||||
justification: Option<&'static str>,
|
||||
},
|
||||
ApplyPatchFunction {
|
||||
target: TargetPath,
|
||||
|
|
@ -86,6 +87,9 @@ enum ActionKind {
|
|||
},
|
||||
}
|
||||
|
||||
const DEFAULT_UNIFIED_EXEC_JUSTIFICATION: &str =
|
||||
"Requires escalated permissions to bypass the sandbox in tests.";
|
||||
|
||||
impl ActionKind {
|
||||
async fn prepare(
|
||||
&self,
|
||||
|
|
@ -139,8 +143,17 @@ impl ActionKind {
|
|||
let event = shell_event(call_id, &command, 1_000, with_escalated_permissions)?;
|
||||
Ok((event, Some(command)))
|
||||
}
|
||||
ActionKind::RunUnifiedExecCommand { command } => {
|
||||
let event = exec_command_event(call_id, command, Some(1000))?;
|
||||
ActionKind::RunUnifiedExecCommand {
|
||||
command,
|
||||
justification,
|
||||
} => {
|
||||
let event = exec_command_event(
|
||||
call_id,
|
||||
command,
|
||||
Some(1000),
|
||||
with_escalated_permissions,
|
||||
*justification,
|
||||
)?;
|
||||
Ok((
|
||||
event,
|
||||
Some(vec![
|
||||
|
|
@ -199,13 +212,24 @@ fn shell_event(
|
|||
Ok(ev_function_call(call_id, "shell", &args_str))
|
||||
}
|
||||
|
||||
fn exec_command_event(call_id: &str, cmd: &str, yield_time_ms: Option<u64>) -> Result<Value> {
|
||||
fn exec_command_event(
|
||||
call_id: &str,
|
||||
cmd: &str,
|
||||
yield_time_ms: Option<u64>,
|
||||
with_escalated_permissions: bool,
|
||||
justification: Option<&str>,
|
||||
) -> Result<Value> {
|
||||
let mut args = json!({
|
||||
"cmd": cmd.to_string(),
|
||||
});
|
||||
if let Some(yield_time_ms) = yield_time_ms {
|
||||
args["yield_time_ms"] = json!(yield_time_ms);
|
||||
}
|
||||
if with_escalated_permissions {
|
||||
args["with_escalated_permissions"] = json!(true);
|
||||
let reason = justification.unwrap_or(DEFAULT_UNIFIED_EXEC_JUSTIFICATION);
|
||||
args["justification"] = json!(reason);
|
||||
}
|
||||
let args_str = serde_json::to_string(&args)?;
|
||||
Ok(ev_function_call(call_id, "exec_command", &args_str))
|
||||
}
|
||||
|
|
@ -1109,6 +1133,7 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
|||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunUnifiedExecCommand {
|
||||
command: "echo \"hello unified exec\"",
|
||||
justification: None,
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
|
|
@ -1118,12 +1143,34 @@ fn scenarios() -> Vec<ScenarioSpec> {
|
|||
stdout_contains: "hello unified exec",
|
||||
},
|
||||
},
|
||||
#[cfg(not(all(target_os = "linux", target_arch = "aarch64")))]
|
||||
// Linux sandbox arg0 test workaround doesn't work on ARM
|
||||
ScenarioSpec {
|
||||
name: "unified exec on request escalated requires approval",
|
||||
approval_policy: OnRequest,
|
||||
sandbox_policy: SandboxPolicy::ReadOnly,
|
||||
action: ActionKind::RunUnifiedExecCommand {
|
||||
command: "python3 -c 'print('\"'\"'escalated unified exec'\"'\"')'",
|
||||
justification: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
},
|
||||
with_escalated_permissions: true,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
model_override: None,
|
||||
outcome: Outcome::ExecApproval {
|
||||
decision: ReviewDecision::Approved,
|
||||
expected_reason: Some(DEFAULT_UNIFIED_EXEC_JUSTIFICATION),
|
||||
},
|
||||
expectation: Expectation::CommandSuccess {
|
||||
stdout_contains: "escalated unified exec",
|
||||
},
|
||||
},
|
||||
ScenarioSpec {
|
||||
name: "unified exec on request requires approval unless trusted",
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
action: ActionKind::RunUnifiedExecCommand {
|
||||
command: "git reset --hard",
|
||||
justification: None,
|
||||
},
|
||||
with_escalated_permissions: false,
|
||||
features: vec![Feature::UnifiedExec],
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue