feat: add shell snapshot for shell command (#7786)
This commit is contained in:
parent
b2280d6205
commit
29381ba5c2
14 changed files with 301 additions and 129 deletions
8
codex-rs/Cargo.lock
generated
8
codex-rs/Cargo.lock
generated
|
|
@ -2601,7 +2601,7 @@ checksum = "0ce92ff622d6dadf7349484f42c93271a0d49b7cc4d466a936405bacbe10aa78"
|
|||
dependencies = [
|
||||
"cfg-if",
|
||||
"rustix 1.0.8",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -3505,7 +3505,7 @@ checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
|
|||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -5294,7 +5294,7 @@ dependencies = [
|
|||
"errno",
|
||||
"libc",
|
||||
"linux-raw-sys 0.4.15",
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
@ -7475,7 +7475,7 @@ version = "0.1.9"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
|
||||
dependencies = [
|
||||
"windows-sys 0.52.0",
|
||||
"windows-sys 0.59.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ pub use auth_fixtures::write_chatgpt_auth;
|
|||
use codex_app_server_protocol::JSONRPCResponse;
|
||||
pub use core_test_support::format_with_current_shell;
|
||||
pub use core_test_support::format_with_current_shell_display;
|
||||
pub use core_test_support::format_with_current_shell_display_non_login;
|
||||
pub use core_test_support::format_with_current_shell_non_login;
|
||||
pub use mcp_process::McpProcess;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
pub use mock_model_server::create_mock_chat_completions_server_unchecked;
|
||||
|
|
|
|||
|
|
@ -63,33 +63,6 @@ impl Shell {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn wrap_command_with_snapshot(&self, command: &[String]) -> Vec<String> {
|
||||
let Some(snapshot) = &self.shell_snapshot else {
|
||||
return command.to_vec();
|
||||
};
|
||||
|
||||
if command.is_empty() {
|
||||
return command.to_vec();
|
||||
}
|
||||
|
||||
match self.shell_type {
|
||||
ShellType::Zsh | ShellType::Bash | ShellType::Sh => {
|
||||
let mut args = self.derive_exec_args(". \"$0\" && exec \"$@\"", false);
|
||||
args.push(snapshot.path.to_string_lossy().to_string());
|
||||
args.extend_from_slice(command);
|
||||
args
|
||||
}
|
||||
ShellType::PowerShell => {
|
||||
let mut args =
|
||||
self.derive_exec_args("param($snapshot) . $snapshot; & @args", false);
|
||||
args.push(snapshot.path.to_string_lossy().to_string());
|
||||
args.extend_from_slice(command);
|
||||
args
|
||||
}
|
||||
ShellType::Cmd => command.to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
|
|
|
|||
|
|
@ -257,7 +257,7 @@ mod tests {
|
|||
use std::os::unix::fs::PermissionsExt;
|
||||
#[cfg(target_os = "linux")]
|
||||
use std::process::Command as StdCommand;
|
||||
use std::sync::Arc;
|
||||
|
||||
use tempfile::tempdir;
|
||||
|
||||
#[cfg(not(target_os = "windows"))]
|
||||
|
|
@ -293,53 +293,6 @@ mod tests {
|
|||
assert!(result.is_err());
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[test]
|
||||
fn wrap_command_with_snapshot_wraps_bash_shell() {
|
||||
let snapshot_path = PathBuf::from("/tmp/snapshot.sh");
|
||||
let shell = Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
shell_path: PathBuf::from("/bin/bash"),
|
||||
shell_snapshot: Some(Arc::new(ShellSnapshot {
|
||||
path: snapshot_path.clone(),
|
||||
})),
|
||||
};
|
||||
let original_command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"echo hello".to_string(),
|
||||
];
|
||||
|
||||
let wrapped = shell.wrap_command_with_snapshot(&original_command);
|
||||
|
||||
let mut expected = shell.derive_exec_args(". \"$0\" && exec \"$@\"", false);
|
||||
expected.push(snapshot_path.to_string_lossy().to_string());
|
||||
expected.extend_from_slice(&original_command);
|
||||
|
||||
assert_eq!(wrapped, expected);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrap_command_with_snapshot_preserves_cmd_shell() {
|
||||
let snapshot_path = PathBuf::from("C:\\snapshot.cmd");
|
||||
let shell = Shell {
|
||||
shell_type: ShellType::Cmd,
|
||||
shell_path: PathBuf::from("cmd"),
|
||||
shell_snapshot: Some(Arc::new(ShellSnapshot {
|
||||
path: snapshot_path,
|
||||
})),
|
||||
};
|
||||
let original_command = vec![
|
||||
"cmd".to_string(),
|
||||
"/c".to_string(),
|
||||
"echo hello".to_string(),
|
||||
];
|
||||
|
||||
let wrapped = shell.wrap_command_with_snapshot(&original_command);
|
||||
|
||||
assert_eq!(wrapped, original_command);
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
#[tokio::test]
|
||||
async fn try_new_creates_and_deletes_snapshot_file() -> Result<()> {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ use crate::exec_policy::create_exec_approval_requirement_for_command;
|
|||
use crate::function_tool::FunctionCallError;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::shell::Shell;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolOutput;
|
||||
use crate::tools::context::ToolPayload;
|
||||
|
|
@ -42,13 +43,18 @@ impl ShellHandler {
|
|||
}
|
||||
|
||||
impl ShellCommandHandler {
|
||||
fn base_command(shell: &Shell, command: &str, login: Option<bool>) -> Vec<String> {
|
||||
let use_login_shell = login.unwrap_or(true);
|
||||
shell.derive_exec_args(command, use_login_shell)
|
||||
}
|
||||
|
||||
fn to_exec_params(
|
||||
params: ShellCommandToolCallParams,
|
||||
session: &crate::codex::Session,
|
||||
turn_context: &TurnContext,
|
||||
) -> ExecParams {
|
||||
let shell = session.user_shell();
|
||||
let command = shell.derive_exec_args(¶ms.command, params.login.unwrap_or(true));
|
||||
let command = Self::base_command(shell.as_ref(), ¶ms.command, params.login);
|
||||
|
||||
ExecParams {
|
||||
command,
|
||||
|
|
@ -155,7 +161,7 @@ impl ToolHandler for ShellCommandHandler {
|
|||
serde_json::from_str::<ShellCommandToolCallParams>(arguments)
|
||||
.map(|params| {
|
||||
let shell = invocation.session.user_shell();
|
||||
let command = shell.derive_exec_args(¶ms.command, params.login.unwrap_or(true));
|
||||
let command = Self::base_command(shell.as_ref(), ¶ms.command, params.login);
|
||||
!is_known_safe_command(&command)
|
||||
})
|
||||
.unwrap_or(true)
|
||||
|
|
@ -289,6 +295,7 @@ impl ShellHandler {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
|
||||
use codex_protocol::models::ShellCommandToolCallParams;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
|
@ -299,6 +306,7 @@ mod tests {
|
|||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::ShellType;
|
||||
use crate::shell_snapshot::ShellSnapshot;
|
||||
use crate::tools::handlers::ShellCommandHandler;
|
||||
|
||||
/// The logic for is_known_safe_command() has heuristics for known shells,
|
||||
|
|
@ -372,4 +380,29 @@ mod tests {
|
|||
assert_eq!(exec_params.justification, justification);
|
||||
assert_eq!(exec_params.arg0, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shell_command_handler_respects_explicit_login_flag() {
|
||||
let shell = Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
shell_path: PathBuf::from("/bin/bash"),
|
||||
shell_snapshot: Some(Arc::new(ShellSnapshot {
|
||||
path: PathBuf::from("/tmp/snapshot.sh"),
|
||||
})),
|
||||
};
|
||||
|
||||
let login_command =
|
||||
ShellCommandHandler::base_command(&shell, "echo login shell", Some(true));
|
||||
assert_eq!(
|
||||
login_command,
|
||||
shell.derive_exec_args("echo login shell", true)
|
||||
);
|
||||
|
||||
let non_login_command =
|
||||
ShellCommandHandler::base_command(&shell, "echo non login shell", Some(false));
|
||||
assert_eq!(
|
||||
non_login_command,
|
||||
shell.derive_exec_args("echo non login shell", false)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,8 +34,8 @@ struct ExecCommandArgs {
|
|||
workdir: Option<String>,
|
||||
#[serde(default)]
|
||||
shell: Option<String>,
|
||||
#[serde(default)]
|
||||
login: Option<bool>,
|
||||
#[serde(default = "default_login")]
|
||||
login: bool,
|
||||
#[serde(default = "default_exec_yield_time_ms")]
|
||||
yield_time_ms: u64,
|
||||
#[serde(default)]
|
||||
|
|
@ -66,6 +66,10 @@ fn default_write_stdin_yield_time_ms() -> u64 {
|
|||
250
|
||||
}
|
||||
|
||||
fn default_login() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for UnifiedExecHandler {
|
||||
fn kind(&self) -> ToolKind {
|
||||
|
|
@ -125,11 +129,10 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
))
|
||||
})?;
|
||||
let process_id = manager.allocate_process_id().await;
|
||||
let command = get_command(&args, session.user_shell());
|
||||
|
||||
let command_for_intercept = get_command(&args, session.user_shell());
|
||||
let ExecCommandArgs {
|
||||
workdir,
|
||||
login,
|
||||
yield_time_ms,
|
||||
max_output_tokens,
|
||||
sandbox_permissions,
|
||||
|
|
@ -156,7 +159,7 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
let cwd = workdir.clone().unwrap_or_else(|| context.turn.cwd.clone());
|
||||
|
||||
if let Some(output) = intercept_apply_patch(
|
||||
&command_for_intercept,
|
||||
&command,
|
||||
&cwd,
|
||||
Some(yield_time_ms),
|
||||
context.session.as_ref(),
|
||||
|
|
@ -177,14 +180,6 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
&context.call_id,
|
||||
None,
|
||||
);
|
||||
let command = if login.is_none() {
|
||||
context
|
||||
.session
|
||||
.user_shell()
|
||||
.wrap_command_with_snapshot(&command_for_intercept)
|
||||
} else {
|
||||
command_for_intercept
|
||||
};
|
||||
let emitter = ToolEmitter::unified_exec(
|
||||
&command,
|
||||
cwd.clone(),
|
||||
|
|
@ -258,14 +253,15 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
}
|
||||
|
||||
fn get_command(args: &ExecCommandArgs, session_shell: Arc<Shell>) -> Vec<String> {
|
||||
if let Some(shell_str) = &args.shell {
|
||||
let model_shell = args.shell.as_ref().map(|shell_str| {
|
||||
let mut shell = get_shell_by_model_provided_path(&PathBuf::from(shell_str));
|
||||
shell.shell_snapshot = None;
|
||||
return shell.derive_exec_args(&args.cmd, args.login.unwrap_or(true));
|
||||
}
|
||||
shell
|
||||
});
|
||||
|
||||
let use_login_shell = args.login.unwrap_or(session_shell.shell_snapshot.is_none());
|
||||
session_shell.derive_exec_args(&args.cmd, use_login_shell)
|
||||
let shell = model_shell.as_ref().unwrap_or(session_shell.as_ref());
|
||||
|
||||
shell.derive_exec_args(&args.cmd, args.login)
|
||||
}
|
||||
|
||||
fn format_response(response: &UnifiedExecResponse) -> String {
|
||||
|
|
@ -329,7 +325,13 @@ mod tests {
|
|||
|
||||
let command = get_command(&args, Arc::new(default_user_shell()));
|
||||
|
||||
assert_eq!(command[2], "echo hello");
|
||||
assert_eq!(command.last(), Some(&"echo hello".to_string()));
|
||||
if command
|
||||
.iter()
|
||||
.any(|arg| arg.eq_ignore_ascii_case("-Command"))
|
||||
{
|
||||
assert!(command.contains(&"-NoProfile".to_string()));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ small and focused and reuses the orchestrator for approvals + sandbox + retry.
|
|||
use crate::exec::ExecExpiration;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::shell::Shell;
|
||||
use crate::tools::sandboxing::ToolError;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
|
@ -38,3 +39,39 @@ pub(crate) fn build_command_spec(
|
|||
justification,
|
||||
})
|
||||
}
|
||||
|
||||
/// POSIX-only helper: for commands produced by `Shell::derive_exec_args`
|
||||
/// for Bash/Zsh/sh of the form `[shell_path, "-lc", "<script>"]`, and
|
||||
/// when a snapshot is configured on the session shell, rewrite the argv
|
||||
/// to a single non-login shell that sources the snapshot before running
|
||||
/// the original script:
|
||||
///
|
||||
/// shell -lc "<script>"
|
||||
/// => shell -c ". SNAPSHOT && <script>"
|
||||
///
|
||||
/// On non-POSIX shells or non-matching commands this is a no-op.
|
||||
pub(crate) fn maybe_wrap_shell_lc_with_snapshot(
|
||||
command: &[String],
|
||||
session_shell: &Shell,
|
||||
) -> Vec<String> {
|
||||
let Some(snapshot) = &session_shell.shell_snapshot else {
|
||||
return command.to_vec();
|
||||
};
|
||||
|
||||
if command.len() < 3 {
|
||||
return command.to_vec();
|
||||
}
|
||||
|
||||
let flag = command[1].as_str();
|
||||
if flag != "-lc" {
|
||||
return command.to_vec();
|
||||
}
|
||||
|
||||
let snapshot_path = snapshot.path.to_string_lossy();
|
||||
let rewritten_script = format!(". \"{snapshot_path}\" && {}", command[2]);
|
||||
|
||||
let mut rewritten = command.to_vec();
|
||||
rewritten[1] = "-c".to_string();
|
||||
rewritten[2] = rewritten_script;
|
||||
rewritten
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ use crate::exec::ExecToolCallOutput;
|
|||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
|
|
@ -140,8 +141,12 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
|||
attempt: &SandboxAttempt<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<ExecToolCallOutput, ToolError> {
|
||||
let base_command = &req.command;
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
|
||||
|
||||
let spec = build_command_spec(
|
||||
&req.command,
|
||||
&command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
req.timeout_ms.into(),
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ use crate::error::SandboxErr;
|
|||
use crate::exec::ExecExpiration;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
|
|
@ -159,10 +160,14 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
|||
&mut self,
|
||||
req: &UnifiedExecRequest,
|
||||
attempt: &SandboxAttempt<'_>,
|
||||
_ctx: &ToolCtx<'_>,
|
||||
ctx: &ToolCtx<'_>,
|
||||
) -> Result<UnifiedExecSession, ToolError> {
|
||||
let base_command = &req.command;
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
|
||||
|
||||
let spec = build_command_spec(
|
||||
&req.command,
|
||||
&command,
|
||||
&req.cwd,
|
||||
&req.env,
|
||||
ExecExpiration::DefaultTimeout,
|
||||
|
|
|
|||
|
|
@ -153,7 +153,8 @@ fn create_exec_command_tool() -> ToolSpec {
|
|||
"login".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Whether to run the shell with -l/-i semantics. Defaults to true.".to_string(),
|
||||
"Whether to run the shell with -l/-i semantics. Defaults to false unless a shell snapshot is available."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
|
|
@ -335,7 +336,7 @@ fn create_shell_command_tool() -> ToolSpec {
|
|||
"login".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Whether to run the shell with login shell semantics. Defaults to true."
|
||||
"Whether to run the shell with login shell semantics. Defaults to false unless a shell snapshot is available."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
|
|
|
|||
|
|
@ -181,6 +181,16 @@ pub fn format_with_current_shell_display(command: &str) -> String {
|
|||
shlex::try_join(args.iter().map(String::as_str)).expect("serialize current shell command")
|
||||
}
|
||||
|
||||
pub fn format_with_current_shell_non_login(command: &str) -> Vec<String> {
|
||||
codex_core::shell::default_user_shell().derive_exec_args(command, false)
|
||||
}
|
||||
|
||||
pub fn format_with_current_shell_display_non_login(command: &str) -> String {
|
||||
let args = format_with_current_shell_non_login(command);
|
||||
shlex::try_join(args.iter().map(String::as_str))
|
||||
.expect("serialize current shell command without login")
|
||||
}
|
||||
|
||||
pub mod fs_wait {
|
||||
use anyhow::Result;
|
||||
use anyhow::anyhow;
|
||||
|
|
|
|||
|
|
@ -85,14 +85,88 @@ async fn run_snapshot_command(command: &str) -> Result<SnapshotRun> {
|
|||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
|
||||
let snapshot_path = entries
|
||||
.next_entry()
|
||||
.await?
|
||||
.map(|entry| entry.path())
|
||||
.expect("shell snapshot created");
|
||||
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
|
||||
|
||||
let snapshot_arg = begin
|
||||
.command
|
||||
.iter()
|
||||
.find(|arg| arg.contains("shell_snapshots"))
|
||||
.expect("command includes shell snapshot path")
|
||||
.to_owned();
|
||||
let snapshot_path = PathBuf::from(&snapshot_arg);
|
||||
let end = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ExecCommandEnd(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
Ok(SnapshotRun {
|
||||
begin,
|
||||
end,
|
||||
snapshot_path,
|
||||
snapshot_content,
|
||||
codex_home,
|
||||
})
|
||||
}
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
async fn run_shell_command_snapshot(command: &str) -> Result<SnapshotRun> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 1000,
|
||||
});
|
||||
let call_id = "shell-snapshot-command";
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(harness.server(), responses).await;
|
||||
|
||||
let test = harness.test();
|
||||
let codex = test.codex.clone();
|
||||
let codex_home = test.home.path().to_path_buf();
|
||||
let session_model = test.session_configured.model.clone();
|
||||
let cwd = test.cwd_path().to_path_buf();
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run shell_command with shell snapshot".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd,
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin = wait_for_event_match(&codex, |ev| match ev {
|
||||
EventMsg::ExecCommandBegin(ev) if ev.call_id == call_id => Some(ev.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
|
||||
let snapshot_path = entries
|
||||
.next_entry()
|
||||
.await?
|
||||
.map(|entry| entry.path())
|
||||
.expect("shell snapshot created");
|
||||
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
|
||||
|
||||
let end = wait_for_event_match(&codex, |ev| match ev {
|
||||
|
|
@ -134,21 +208,9 @@ async fn linux_unified_exec_uses_shell_snapshot() -> Result<()> {
|
|||
let run = run_snapshot_command(command).await?;
|
||||
let stdout = normalize_newlines(&run.end.stdout);
|
||||
|
||||
let shell_path = run
|
||||
.begin
|
||||
.command
|
||||
.first()
|
||||
.expect("shell path recorded")
|
||||
.clone();
|
||||
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-c"));
|
||||
assert_eq!(
|
||||
run.begin.command.get(2).map(String::as_str),
|
||||
Some(". \"$0\" && exec \"$@\"")
|
||||
);
|
||||
assert_eq!(run.begin.command.get(4), Some(&shell_path));
|
||||
assert_eq!(run.begin.command.get(5).map(String::as_str), Some("-c"));
|
||||
assert_eq!(run.begin.command.last(), Some(&command.to_string()));
|
||||
|
||||
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
|
||||
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
|
||||
assert_eq!(run.begin.command.len(), 3);
|
||||
assert!(run.snapshot_path.starts_with(&run.codex_home));
|
||||
assert_posix_snapshot_sections(&run.snapshot_content);
|
||||
assert_eq!(run.end.exit_code, 0);
|
||||
|
|
@ -160,6 +222,93 @@ async fn linux_unified_exec_uses_shell_snapshot() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn linux_shell_command_uses_shell_snapshot() -> Result<()> {
|
||||
let command = "echo shell-command-snapshot-linux";
|
||||
let run = run_shell_command_snapshot(command).await?;
|
||||
|
||||
assert_eq!(run.begin.command.get(1).map(String::as_str), Some("-lc"));
|
||||
assert_eq!(run.begin.command.get(2).map(String::as_str), Some(command));
|
||||
assert_eq!(run.begin.command.len(), 3);
|
||||
assert!(run.snapshot_path.starts_with(&run.codex_home));
|
||||
assert_posix_snapshot_sections(&run.snapshot_content);
|
||||
assert_eq!(
|
||||
normalize_newlines(&run.end.stdout).trim(),
|
||||
"shell-command-snapshot-linux"
|
||||
);
|
||||
assert_eq!(run.end.exit_code, 0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(target_os = "windows", ignore)]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_snapshot_still_intercepts_apply_patch() -> Result<()> {
|
||||
let builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::ShellSnapshot);
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
let harness = TestCodexHarness::with_builder(builder).await?;
|
||||
|
||||
let test = harness.test();
|
||||
let codex = test.codex.clone();
|
||||
let cwd = test.cwd_path().to_path_buf();
|
||||
let codex_home = test.home.path().to_path_buf();
|
||||
let target = cwd.join("snapshot-apply.txt");
|
||||
|
||||
let script = "apply_patch <<'EOF'\n*** Begin Patch\n*** Add File: snapshot-apply.txt\n+hello from snapshot\n*** End Patch\nEOF\n";
|
||||
let args = json!({
|
||||
"command": script,
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
let call_id = "shell-snapshot-apply-patch";
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(harness.server(), responses).await;
|
||||
|
||||
let model = test.session_configured.model.clone();
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "apply patch via shell_command with snapshot".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.clone(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
assert_eq!(fs::read_to_string(&target).await?, "hello from snapshot\n");
|
||||
|
||||
let mut entries = fs::read_dir(codex_home.join("shell_snapshots")).await?;
|
||||
let snapshot_path = entries
|
||||
.next_entry()
|
||||
.await?
|
||||
.map(|entry| entry.path())
|
||||
.expect("shell snapshot created");
|
||||
let snapshot_content = fs::read_to_string(&snapshot_path).await?;
|
||||
assert_posix_snapshot_sections(&snapshot_content);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(not(target_os = "macos"), ignore)]
|
||||
#[cfg_attr(
|
||||
target_os = "macos",
|
||||
|
|
|
|||
|
|
@ -2164,14 +2164,14 @@ async fn unified_exec_python_prompt_under_seatbelt() -> Result<()> {
|
|||
let startup_call_id = "uexec-python-seatbelt";
|
||||
let startup_args = serde_json::json!({
|
||||
"cmd": format!("{} -i", python.display()),
|
||||
"yield_time_ms": 750,
|
||||
"yield_time_ms": 1_500,
|
||||
});
|
||||
|
||||
let exit_call_id = "uexec-python-exit";
|
||||
let exit_args = serde_json::json!({
|
||||
"chars": "exit()\n",
|
||||
"session_id": 1000,
|
||||
"yield_time_ms": 750,
|
||||
"yield_time_ms": 1_500,
|
||||
});
|
||||
|
||||
let responses = vec![
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ mod mock_model_server;
|
|||
mod responses;
|
||||
|
||||
pub use core_test_support::format_with_current_shell;
|
||||
pub use core_test_support::format_with_current_shell_display_non_login;
|
||||
pub use core_test_support::format_with_current_shell_non_login;
|
||||
pub use mcp_process::McpProcess;
|
||||
use mcp_types::JSONRPCResponse;
|
||||
pub use mock_model_server::create_mock_chat_completions_server;
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue