feat(core) Add login to shell_command tool (#6846)
## Summary Adds the `login` parameter to the `shell_command` tool - optional, defaults to true. ## Testing - [x] Tested locally
This commit is contained in:
parent
d08efb1743
commit
a8cbbdbc6e
8 changed files with 288 additions and 2 deletions
|
|
@ -75,6 +75,7 @@ If you don’t have the tool:
|
|||
### Test assertions
|
||||
|
||||
- Tests should use pretty_assertions::assert_eq for clearer diffs. Import this at the top of the test module if it isn't already.
|
||||
- Prefer deep equals comparisons whenever possible. Perform `assert_eq!()` on entire objects, rather than individual fields.
|
||||
|
||||
### Integration tests (core)
|
||||
|
||||
|
|
|
|||
|
|
@ -408,6 +408,48 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_exec_args() {
|
||||
let test_bash_shell = Shell {
|
||||
shell_type: ShellType::Bash,
|
||||
shell_path: PathBuf::from("/bin/bash"),
|
||||
};
|
||||
assert_eq!(
|
||||
test_bash_shell.derive_exec_args("echo hello", false),
|
||||
vec!["/bin/bash", "-c", "echo hello"]
|
||||
);
|
||||
assert_eq!(
|
||||
test_bash_shell.derive_exec_args("echo hello", true),
|
||||
vec!["/bin/bash", "-lc", "echo hello"]
|
||||
);
|
||||
|
||||
let test_zsh_shell = Shell {
|
||||
shell_type: ShellType::Zsh,
|
||||
shell_path: PathBuf::from("/bin/zsh"),
|
||||
};
|
||||
assert_eq!(
|
||||
test_zsh_shell.derive_exec_args("echo hello", false),
|
||||
vec!["/bin/zsh", "-c", "echo hello"]
|
||||
);
|
||||
assert_eq!(
|
||||
test_zsh_shell.derive_exec_args("echo hello", true),
|
||||
vec!["/bin/zsh", "-lc", "echo hello"]
|
||||
);
|
||||
|
||||
let test_powershell_shell = Shell {
|
||||
shell_type: ShellType::PowerShell,
|
||||
shell_path: PathBuf::from("pwsh.exe"),
|
||||
};
|
||||
assert_eq!(
|
||||
test_powershell_shell.derive_exec_args("echo hello", false),
|
||||
vec!["pwsh.exe", "-NoProfile", "-Command", "echo hello"]
|
||||
);
|
||||
assert_eq!(
|
||||
test_powershell_shell.derive_exec_args("echo hello", true),
|
||||
vec!["pwsh.exe", "-Command", "echo hello"]
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_current_shell_detects_zsh() {
|
||||
let shell = Command::new("sh")
|
||||
|
|
|
|||
|
|
@ -49,8 +49,7 @@ impl ShellCommandHandler {
|
|||
turn_context: &TurnContext,
|
||||
) -> ExecParams {
|
||||
let shell = session.user_shell();
|
||||
let use_login_shell = true;
|
||||
let command = shell.derive_exec_args(¶ms.command, use_login_shell);
|
||||
let command = shell.derive_exec_args(¶ms.command, params.login.unwrap_or(true));
|
||||
|
||||
ExecParams {
|
||||
command,
|
||||
|
|
@ -276,9 +275,15 @@ impl ShellHandler {
|
|||
mod tests {
|
||||
use std::path::PathBuf;
|
||||
|
||||
use codex_protocol::models::ShellCommandToolCallParams;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::exec_env::create_env;
|
||||
use crate::is_safe_command::is_known_safe_command;
|
||||
use crate::shell::Shell;
|
||||
use crate::shell::ShellType;
|
||||
use crate::tools::handlers::ShellCommandHandler;
|
||||
|
||||
/// The logic for is_known_safe_command() has heuristics for known shells,
|
||||
/// so we must ensure the commands generated by [ShellCommandHandler] can be
|
||||
|
|
@ -312,4 +317,43 @@ mod tests {
|
|||
&shell.derive_exec_args(command, /* use_login_shell */ false)
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shell_command_handler_to_exec_params_uses_session_shell_and_turn_context() {
|
||||
let (session, turn_context) = make_session_and_context();
|
||||
|
||||
let command = "echo hello".to_string();
|
||||
let workdir = Some("subdir".to_string());
|
||||
let login = None;
|
||||
let timeout_ms = Some(1234);
|
||||
let with_escalated_permissions = Some(true);
|
||||
let justification = Some("because tests".to_string());
|
||||
|
||||
let expected_command = session.user_shell().derive_exec_args(&command, true);
|
||||
let expected_cwd = turn_context.resolve_path(workdir.clone());
|
||||
let expected_env = create_env(&turn_context.shell_environment_policy);
|
||||
|
||||
let params = ShellCommandToolCallParams {
|
||||
command,
|
||||
workdir,
|
||||
login,
|
||||
timeout_ms,
|
||||
with_escalated_permissions,
|
||||
justification: justification.clone(),
|
||||
};
|
||||
|
||||
let exec_params = ShellCommandHandler::to_exec_params(params, &session, &turn_context);
|
||||
|
||||
// ExecParams cannot derive Eq due to the CancellationToken field, so we manually compare the fields.
|
||||
assert_eq!(exec_params.command, expected_command);
|
||||
assert_eq!(exec_params.cwd, expected_cwd);
|
||||
assert_eq!(exec_params.env, expected_env);
|
||||
assert_eq!(exec_params.expiration.timeout_ms(), timeout_ms);
|
||||
assert_eq!(
|
||||
exec_params.with_escalated_permissions,
|
||||
with_escalated_permissions
|
||||
);
|
||||
assert_eq!(exec_params.justification, justification);
|
||||
assert_eq!(exec_params.arg0, None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -331,6 +331,15 @@ fn create_shell_command_tool() -> ToolSpec {
|
|||
description: Some("The working directory to execute the command in".to_string()),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"login".to_string(),
|
||||
JsonSchema::Boolean {
|
||||
description: Some(
|
||||
"Whether to run the shell with login shell semantics. Defaults to true."
|
||||
.to_string(),
|
||||
),
|
||||
},
|
||||
);
|
||||
properties.insert(
|
||||
"timeout_ms".to_string(),
|
||||
JsonSchema::Number {
|
||||
|
|
|
|||
|
|
@ -369,3 +369,15 @@ macro_rules! skip_if_no_network {
|
|||
}
|
||||
}};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! skip_if_windows {
|
||||
($return_value:expr $(,)?) => {{
|
||||
if cfg!(target_os = "windows") {
|
||||
println!(
|
||||
"Skipping test because it cannot execute when network is disabled in a Codex sandbox."
|
||||
);
|
||||
return $return_value;
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ mod review;
|
|||
mod rmcp_client;
|
||||
mod rollout_list_find;
|
||||
mod seatbelt;
|
||||
mod shell_command;
|
||||
mod shell_serialization;
|
||||
mod stream_error_allows_next_turn;
|
||||
mod stream_no_completed;
|
||||
|
|
|
|||
174
codex-rs/core/tests/suite/shell_command.rs
Normal file
174
codex-rs/core/tests/suite/shell_command.rs
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
use anyhow::Result;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::skip_if_windows;
|
||||
use core_test_support::test_codex::TestCodexBuilder;
|
||||
use core_test_support::test_codex::TestCodexHarness;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use serde_json::json;
|
||||
|
||||
fn shell_responses(call_id: &str, command: &str, login: Option<bool>) -> Vec<String> {
|
||||
let args = json!({
|
||||
"command": command,
|
||||
"timeout_ms": 2_000,
|
||||
"login": login,
|
||||
});
|
||||
|
||||
#[allow(clippy::expect_used)]
|
||||
let arguments = serde_json::to_string(&args).expect("serialize shell command arguments");
|
||||
|
||||
vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "shell_command", &arguments),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
]
|
||||
}
|
||||
|
||||
async fn shell_command_harness_with(
|
||||
configure: impl FnOnce(TestCodexBuilder) -> TestCodexBuilder,
|
||||
) -> Result<TestCodexHarness> {
|
||||
let builder = configure(test_codex()).with_config(|config| {
|
||||
config.include_apply_patch_tool = true;
|
||||
});
|
||||
TestCodexHarness::with_builder(builder).await
|
||||
}
|
||||
|
||||
async fn mount_shell_responses(
|
||||
harness: &TestCodexHarness,
|
||||
call_id: &str,
|
||||
command: &str,
|
||||
login: Option<bool>,
|
||||
) {
|
||||
mount_sse_sequence(harness.server(), shell_responses(call_id, command, login)).await;
|
||||
}
|
||||
|
||||
fn assert_shell_command_output(output: &str, expected: &str) -> Result<()> {
|
||||
let normalized_output = output
|
||||
.replace("\r\n", "\n")
|
||||
.replace('\r', "\n")
|
||||
.trim_end_matches('\n')
|
||||
.to_string();
|
||||
|
||||
let expected_pattern = format!(
|
||||
r"(?s)^Exit code: 0\nWall time: [0-9]+(?:\.[0-9]+)? seconds\nOutput:\n{expected}\n?$"
|
||||
);
|
||||
|
||||
assert_regex_match(&expected_pattern, &normalized_output);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_works() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call";
|
||||
mount_shell_responses(&harness, call_id, "echo 'hello, world'", None).await;
|
||||
harness.submit("run the echo command").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "hello, world")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn output_with_login() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call-login-true";
|
||||
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(true)).await;
|
||||
harness.submit("run the echo command with login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "hello, world")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn output_without_login() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call-login-false";
|
||||
mount_shell_responses(&harness, call_id, "echo 'hello, world'", Some(false)).await;
|
||||
harness.submit("run the echo command without login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "hello, world")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn multi_line_output_with_login() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call-first-extra-login";
|
||||
mount_shell_responses(
|
||||
&harness,
|
||||
call_id,
|
||||
"echo 'first line\nsecond line'",
|
||||
Some(true),
|
||||
)
|
||||
.await;
|
||||
harness.submit("run the command with login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "first line\nsecond line")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pipe_output_with_login() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_windows!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call-second-extra-no-login";
|
||||
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", None).await;
|
||||
harness.submit("run the command without login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "hello, world")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn pipe_output_without_login() -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_windows!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-call-third-extra-login-false";
|
||||
mount_shell_responses(&harness, call_id, "echo 'hello, world' | cat", Some(false)).await;
|
||||
harness.submit("run the command without login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "hello, world")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -348,6 +348,9 @@ pub struct ShellCommandToolCallParams {
|
|||
pub command: String,
|
||||
pub workdir: Option<String>,
|
||||
|
||||
/// Whether to run the shell with login shell semantics
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub login: Option<bool>,
|
||||
/// This is the maximum time in milliseconds that the command is allowed to run.
|
||||
#[serde(alias = "timeout")]
|
||||
pub timeout_ms: Option<u64>,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue