feat: update process_exec_tool_call() to take a cancellation token (#6972)

This updates `ExecParams` so that instead of taking `timeout_ms:
Option<u64>`, it now takes a more general cancellation mechanism,
`ExecExpiration`, which is an enum that includes a
`Cancellation(tokio_util::sync::CancellationToken)` variant.

If the cancellation token is fired, then `process_exec_tool_call()`
returns in the same way as if a timeout was exceeded.

This is necessary so that in #6973, we can manage the timeout logic
external to the `process_exec_tool_call()` because we want to "suspend"
the timeout when an elicitation from a human user is pending.








---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/6972).
* #7005
* #6973
* __->__ #6972
This commit is contained in:
Michael Bolin 2025-11-20 16:29:57 -08:00 committed by GitHub
parent 9be310041b
commit f56d1dc8fc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 174 additions and 68 deletions

View file

@ -1171,7 +1171,7 @@ impl CodexMessageProcessor {
let exec_params = ExecParams {
command: params.command,
cwd,
timeout_ms,
expiration: timeout_ms.into(),
env,
with_escalated_permissions: None,
justification: None,

View file

@ -3051,6 +3051,7 @@ mod tests {
let session = Arc::new(session);
let mut turn_context = Arc::new(turn_context_raw);
let timeout_ms = 1000;
let params = ExecParams {
command: if cfg!(windows) {
vec![
@ -3066,7 +3067,7 @@ mod tests {
]
},
cwd: turn_context.cwd.clone(),
timeout_ms: Some(1000),
expiration: timeout_ms.into(),
env: HashMap::new(),
with_escalated_permissions: Some(true),
justification: Some("test".to_string()),
@ -3075,7 +3076,12 @@ mod tests {
let params2 = ExecParams {
with_escalated_permissions: Some(false),
..params.clone()
command: params.command.clone(),
cwd: params.cwd.clone(),
expiration: timeout_ms.into(),
env: HashMap::new(),
justification: params.justification.clone(),
arg0: None,
};
let turn_diff_tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
@ -3095,7 +3101,7 @@ mod tests {
arguments: serde_json::json!({
"command": params.command.clone(),
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": params.timeout_ms,
"timeout_ms": params.expiration.timeout_ms(),
"with_escalated_permissions": params.with_escalated_permissions,
"justification": params.justification.clone(),
})
@ -3132,7 +3138,7 @@ mod tests {
arguments: serde_json::json!({
"command": params2.command.clone(),
"workdir": Some(turn_context.cwd.to_string_lossy().to_string()),
"timeout_ms": params2.timeout_ms,
"timeout_ms": params2.expiration.timeout_ms(),
"with_escalated_permissions": params2.with_escalated_permissions,
"justification": params2.justification.clone(),
})

View file

@ -14,6 +14,7 @@ use tokio::io::AsyncRead;
use tokio::io::AsyncReadExt;
use tokio::io::BufReader;
use tokio::process::Child;
use tokio_util::sync::CancellationToken;
use crate::error::CodexErr;
use crate::error::Result;
@ -47,20 +48,59 @@ const AGGREGATE_BUFFER_INITIAL_CAPACITY: usize = 8 * 1024; // 8 KiB
/// Aggregation still collects full output; only the live event stream is capped.
pub(crate) const MAX_EXEC_OUTPUT_DELTAS_PER_CALL: usize = 10_000;
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct ExecParams {
pub command: Vec<String>,
pub cwd: PathBuf,
pub timeout_ms: Option<u64>,
pub expiration: ExecExpiration,
pub env: HashMap<String, String>,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
pub arg0: Option<String>,
}
impl ExecParams {
pub fn timeout_duration(&self) -> Duration {
Duration::from_millis(self.timeout_ms.unwrap_or(DEFAULT_TIMEOUT_MS))
/// Mechanism to terminate an exec invocation before it finishes naturally.
#[derive(Debug)]
pub enum ExecExpiration {
Timeout(Duration),
DefaultTimeout,
Cancellation(CancellationToken),
}
impl From<Option<u64>> for ExecExpiration {
fn from(timeout_ms: Option<u64>) -> Self {
timeout_ms.map_or(ExecExpiration::DefaultTimeout, |timeout_ms| {
ExecExpiration::Timeout(Duration::from_millis(timeout_ms))
})
}
}
impl From<u64> for ExecExpiration {
fn from(timeout_ms: u64) -> Self {
ExecExpiration::Timeout(Duration::from_millis(timeout_ms))
}
}
impl ExecExpiration {
async fn wait(self) {
match self {
ExecExpiration::Timeout(duration) => tokio::time::sleep(duration).await,
ExecExpiration::DefaultTimeout => {
tokio::time::sleep(Duration::from_millis(DEFAULT_TIMEOUT_MS)).await
}
ExecExpiration::Cancellation(cancel) => {
cancel.cancelled().await;
}
}
}
/// If ExecExpiration is a timeout, returns the timeout in milliseconds.
pub(crate) fn timeout_ms(&self) -> Option<u64> {
match self {
ExecExpiration::Timeout(duration) => Some(duration.as_millis() as u64),
ExecExpiration::DefaultTimeout => Some(DEFAULT_TIMEOUT_MS),
ExecExpiration::Cancellation(_) => None,
}
}
}
@ -96,7 +136,7 @@ pub async fn process_exec_tool_call(
let ExecParams {
command,
cwd,
timeout_ms,
expiration,
env,
with_escalated_permissions,
justification,
@ -115,7 +155,7 @@ pub async fn process_exec_tool_call(
args: args.to_vec(),
cwd,
env,
timeout_ms,
expiration,
with_escalated_permissions,
justification,
};
@ -123,7 +163,7 @@ pub async fn process_exec_tool_call(
let manager = SandboxManager::new();
let exec_env = manager
.transform(
&spec,
spec,
sandbox_policy,
sandbox_type,
sandbox_cwd,
@ -132,7 +172,7 @@ pub async fn process_exec_tool_call(
.map_err(CodexErr::from)?;
// Route through the sandboxing module for a single, unified execution path.
crate::sandboxing::execute_env(&exec_env, sandbox_policy, stdout_stream).await
crate::sandboxing::execute_env(exec_env, sandbox_policy, stdout_stream).await
}
pub(crate) async fn execute_exec_env(
@ -144,7 +184,7 @@ pub(crate) async fn execute_exec_env(
command,
cwd,
env,
timeout_ms,
expiration,
sandbox,
with_escalated_permissions,
justification,
@ -154,7 +194,7 @@ pub(crate) async fn execute_exec_env(
let params = ExecParams {
command,
cwd,
timeout_ms,
expiration,
env,
with_escalated_permissions,
justification,
@ -179,9 +219,12 @@ async fn exec_windows_sandbox(
command,
cwd,
env,
timeout_ms,
expiration,
..
} = params;
// TODO(iceweasel-oai): run_windows_sandbox_capture should support all
// variants of ExecExpiration, not just timeout.
let timeout_ms = expiration.timeout_ms();
let policy_str = serde_json::to_string(sandbox_policy).map_err(|err| {
CodexErr::Io(io::Error::other(format!(
@ -449,12 +492,12 @@ async fn exec(
{
return exec_windows_sandbox(params, sandbox_policy).await;
}
let timeout = params.timeout_duration();
let ExecParams {
command,
cwd,
env,
arg0,
expiration,
..
} = params;
@ -475,14 +518,14 @@ async fn exec(
env,
)
.await?;
consume_truncated_output(child, timeout, stdout_stream).await
consume_truncated_output(child, expiration, stdout_stream).await
}
/// Consumes the output of a child process, truncating it so it is suitable for
/// use as the output of a `shell` tool call. Also enforces specified timeout.
async fn consume_truncated_output(
mut child: Child,
timeout: Duration,
expiration: ExecExpiration,
stdout_stream: Option<StdoutStream>,
) -> Result<RawExecToolCallOutput> {
// Both stdout and stderr were configured with `Stdio::piped()`
@ -516,20 +559,14 @@ async fn consume_truncated_output(
));
let (exit_status, timed_out) = tokio::select! {
result = tokio::time::timeout(timeout, child.wait()) => {
match result {
Ok(status_result) => {
let exit_status = status_result?;
(exit_status, false)
}
Err(_) => {
// timeout
kill_child_process_group(&mut child)?;
child.start_kill()?;
// Debatable whether `child.wait().await` should be called here.
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
}
}
status_result = child.wait() => {
let exit_status = status_result?;
(exit_status, false)
}
_ = expiration.wait() => {
kill_child_process_group(&mut child)?;
child.start_kill()?;
(synthetic_exit_status(EXIT_CODE_SIGNAL_BASE + TIMEOUT_CODE), true)
}
_ = tokio::signal::ctrl_c() => {
kill_child_process_group(&mut child)?;
@ -799,7 +836,7 @@ mod tests {
let params = ExecParams {
command,
cwd: std::env::current_dir()?,
timeout_ms: Some(500),
expiration: 500.into(),
env,
with_escalated_permissions: None,
justification: None,
@ -833,4 +870,62 @@ mod tests {
assert!(killed, "grandchild process with pid {pid} is still alive");
Ok(())
}
#[tokio::test]
async fn process_exec_tool_call_respects_cancellation_token() -> Result<()> {
let command = long_running_command();
let cwd = std::env::current_dir()?;
let env: HashMap<String, String> = std::env::vars().collect();
let cancel_token = CancellationToken::new();
let cancel_tx = cancel_token.clone();
let params = ExecParams {
command,
cwd: cwd.clone(),
expiration: ExecExpiration::Cancellation(cancel_token),
env,
with_escalated_permissions: None,
justification: None,
arg0: None,
};
tokio::spawn(async move {
tokio::time::sleep(Duration::from_millis(1_000)).await;
cancel_tx.cancel();
});
let result = process_exec_tool_call(
params,
SandboxType::None,
&SandboxPolicy::DangerFullAccess,
cwd.as_path(),
&None,
None,
)
.await;
let output = match result {
Err(CodexErr::Sandbox(SandboxErr::Timeout { output })) => output,
other => panic!("expected timeout error, got {other:?}"),
};
assert!(output.timed_out);
assert_eq!(output.exit_code, EXEC_TIMEOUT_EXIT_CODE);
Ok(())
}
#[cfg(unix)]
fn long_running_command() -> Vec<String> {
vec![
"/bin/sh".to_string(),
"-c".to_string(),
"sleep 30".to_string(),
]
}
#[cfg(windows)]
fn long_running_command() -> Vec<String> {
vec![
"powershell.exe".to_string(),
"-NonInteractive".to_string(),
"-NoLogo".to_string(),
"-Command".to_string(),
"Start-Sleep -Seconds 30".to_string(),
]
}
}

View file

@ -8,6 +8,7 @@ readytospawn environment.
pub mod assessment;
use crate::exec::ExecExpiration;
use crate::exec::ExecToolCallOutput;
use crate::exec::SandboxType;
use crate::exec::StdoutStream;
@ -48,23 +49,23 @@ impl From<bool> for SandboxPermissions {
}
}
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct CommandSpec {
pub program: String,
pub args: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub timeout_ms: Option<u64>,
pub expiration: ExecExpiration,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
}
#[derive(Clone, Debug)]
#[derive(Debug)]
pub struct ExecEnv {
pub command: Vec<String>,
pub cwd: PathBuf,
pub env: HashMap<String, String>,
pub timeout_ms: Option<u64>,
pub expiration: ExecExpiration,
pub sandbox: SandboxType,
pub with_escalated_permissions: Option<bool>,
pub justification: Option<String>,
@ -115,13 +116,13 @@ impl SandboxManager {
pub(crate) fn transform(
&self,
spec: &CommandSpec,
mut spec: CommandSpec,
policy: &SandboxPolicy,
sandbox: SandboxType,
sandbox_policy_cwd: &Path,
codex_linux_sandbox_exe: Option<&PathBuf>,
) -> Result<ExecEnv, SandboxTransformError> {
let mut env = spec.env.clone();
let mut env = spec.env;
if !policy.has_full_network_access() {
env.insert(
CODEX_SANDBOX_NETWORK_DISABLED_ENV_VAR.to_string(),
@ -130,8 +131,8 @@ impl SandboxManager {
}
let mut command = Vec::with_capacity(1 + spec.args.len());
command.push(spec.program.clone());
command.extend(spec.args.iter().cloned());
command.push(spec.program);
command.append(&mut spec.args);
let (command, sandbox_env, arg0_override) = match sandbox {
SandboxType::None => (command, HashMap::new(), None),
@ -176,12 +177,12 @@ impl SandboxManager {
Ok(ExecEnv {
command,
cwd: spec.cwd.clone(),
cwd: spec.cwd,
env,
timeout_ms: spec.timeout_ms,
expiration: spec.expiration,
sandbox,
with_escalated_permissions: spec.with_escalated_permissions,
justification: spec.justification.clone(),
justification: spec.justification,
arg0: arg0_override,
})
}
@ -192,9 +193,9 @@ impl SandboxManager {
}
pub async fn execute_env(
env: &ExecEnv,
env: ExecEnv,
policy: &SandboxPolicy,
stdout_stream: Option<StdoutStream>,
) -> crate::error::Result<ExecToolCallOutput> {
execute_exec_env(env.clone(), policy, stdout_stream).await
execute_exec_env(env, policy, stdout_stream).await
}

View file

@ -95,7 +95,9 @@ impl SessionTask for UserShellCommandTask {
command: command.clone(),
cwd: cwd.clone(),
env: create_env(&turn_context.shell_environment_policy),
timeout_ms: Some(USER_SHELL_TIMEOUT_MS),
// TODO(zhao-oai): Now that we have ExecExpiration::Cancellation, we
// should use that instead of an "arbitrarily large" timeout here.
expiration: USER_SHELL_TIMEOUT_MS.into(),
sandbox: SandboxType::None,
with_escalated_permissions: None,
justification: None,

View file

@ -37,7 +37,7 @@ impl ShellHandler {
ExecParams {
command: params.command,
cwd: turn_context.resolve_path(params.workdir.clone()),
timeout_ms: params.timeout_ms,
expiration: params.timeout_ms.into(),
env: create_env(&turn_context.shell_environment_policy),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
@ -59,7 +59,7 @@ impl ShellCommandHandler {
ExecParams {
command,
cwd: turn_context.resolve_path(params.workdir.clone()),
timeout_ms: params.timeout_ms,
expiration: params.timeout_ms.into(),
env: create_env(&turn_context.shell_environment_policy),
with_escalated_permissions: params.with_escalated_permissions,
justification: params.justification,
@ -243,7 +243,7 @@ impl ShellHandler {
let req = ApplyPatchRequest {
patch: apply.action.patch.clone(),
cwd: apply.action.cwd.clone(),
timeout_ms: exec_params.timeout_ms,
timeout_ms: exec_params.expiration.timeout_ms(),
user_explicitly_approved: apply.user_explicitly_approved_this_action,
codex_exe: turn.codex_linux_sandbox_exe.clone(),
};
@ -300,7 +300,7 @@ impl ShellHandler {
let req = ShellRequest {
command: exec_params.command.clone(),
cwd: exec_params.cwd.clone(),
timeout_ms: exec_params.timeout_ms,
timeout_ms: exec_params.expiration.timeout_ms(),
env: exec_params.env.clone(),
with_escalated_permissions: exec_params.with_escalated_permissions,
justification: exec_params.justification.clone(),

View file

@ -67,7 +67,7 @@ impl ApplyPatchRuntime {
program,
args: vec![CODEX_APPLY_PATCH_ARG1.to_string(), req.patch.clone()],
cwd: req.cwd.clone(),
timeout_ms: req.timeout_ms,
expiration: req.timeout_ms.into(),
// Run apply_patch with a minimal environment for determinism and to avoid leaks.
env: HashMap::new(),
with_escalated_permissions: None,
@ -153,9 +153,9 @@ impl ToolRuntime<ApplyPatchRequest, ExecToolCallOutput> for ApplyPatchRuntime {
) -> Result<ExecToolCallOutput, ToolError> {
let spec = Self::build_command_spec(req)?;
let env = attempt
.env_for(&spec)
.env_for(spec)
.map_err(|err| ToolError::Codex(err.into()))?;
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
let out = execute_env(env, attempt.policy, Self::stdout_stream(ctx))
.await
.map_err(ToolError::Codex)?;
Ok(out)

View file

@ -4,6 +4,7 @@ Module: runtimes
Concrete ToolRuntime implementations for specific tools. Each runtime stays
small and focused and reuses the orchestrator for approvals + sandbox + retry.
*/
use crate::exec::ExecExpiration;
use crate::sandboxing::CommandSpec;
use crate::tools::sandboxing::ToolError;
use std::collections::HashMap;
@ -19,7 +20,7 @@ pub(crate) fn build_command_spec(
command: &[String],
cwd: &Path,
env: &HashMap<String, String>,
timeout_ms: Option<u64>,
expiration: ExecExpiration,
with_escalated_permissions: Option<bool>,
justification: Option<String>,
) -> Result<CommandSpec, ToolError> {
@ -31,7 +32,7 @@ pub(crate) fn build_command_spec(
args: args.to_vec(),
cwd: cwd.to_path_buf(),
env: env.clone(),
timeout_ms,
expiration,
with_escalated_permissions,
justification,
})

View file

@ -133,14 +133,14 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
&req.command,
&req.cwd,
&req.env,
req.timeout_ms,
req.timeout_ms.into(),
req.with_escalated_permissions,
req.justification.clone(),
)?;
let env = attempt
.env_for(&spec)
.env_for(spec)
.map_err(|err| ToolError::Codex(err.into()))?;
let out = execute_env(&env, attempt.policy, Self::stdout_stream(ctx))
let out = execute_env(env, attempt.policy, Self::stdout_stream(ctx))
.await
.map_err(ToolError::Codex)?;
Ok(out)

View file

@ -6,6 +6,7 @@ the session manager to spawn PTYs once an ExecEnv is prepared.
*/
use crate::error::CodexErr;
use crate::error::SandboxErr;
use crate::exec::ExecExpiration;
use crate::tools::runtimes::build_command_spec;
use crate::tools::sandboxing::Approvable;
use crate::tools::sandboxing::ApprovalCtx;
@ -150,13 +151,13 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
&req.command,
&req.cwd,
&req.env,
None,
ExecExpiration::DefaultTimeout,
req.with_escalated_permissions,
req.justification.clone(),
)
.map_err(|_| ToolError::Rejected("missing command line for PTY".to_string()))?;
let exec_env = attempt
.env_for(&spec)
.env_for(spec)
.map_err(|err| ToolError::Codex(err.into()))?;
self.manager
.open_session_with_exec_env(&exec_env)

View file

@ -216,7 +216,7 @@ pub(crate) struct SandboxAttempt<'a> {
impl<'a> SandboxAttempt<'a> {
pub fn env_for(
&self,
spec: &CommandSpec,
spec: CommandSpec,
) -> Result<crate::sandboxing::ExecEnv, SandboxTransformError> {
self.manager.transform(
spec,

View file

@ -32,7 +32,7 @@ async fn run_test_cmd(tmp: TempDir, cmd: Vec<&str>) -> Result<ExecToolCallOutput
let params = ExecParams {
command: cmd.iter().map(ToString::to_string).collect(),
cwd: tmp.path().to_path_buf(),
timeout_ms: Some(1000),
expiration: 1000.into(),
env: HashMap::new(),
with_escalated_permissions: None,
justification: None,

View file

@ -79,7 +79,7 @@ impl EscalateServer {
command,
],
cwd: PathBuf::from(&workdir),
timeout_ms,
expiration: timeout_ms.into(),
env,
with_escalated_permissions: None,
justification: None,

View file

@ -40,7 +40,7 @@ async fn run_cmd(cmd: &[&str], writable_roots: &[PathBuf], timeout_ms: u64) {
let params = ExecParams {
command: cmd.iter().copied().map(str::to_owned).collect(),
cwd,
timeout_ms: Some(timeout_ms),
expiration: timeout_ms.into(),
env: create_env_from_core_vars(),
with_escalated_permissions: None,
justification: None,
@ -143,7 +143,7 @@ async fn assert_network_blocked(cmd: &[&str]) {
cwd,
// Give the tool a generous 2-second timeout so even slow DNS timeouts
// do not stall the suite.
timeout_ms: Some(NETWORK_TIMEOUT_MS),
expiration: NETWORK_TIMEOUT_MS.into(),
env: create_env_from_core_vars(),
with_escalated_permissions: None,
justification: None,