fix(exec-policy) No empty command lists (#11397)
## Summary This should rarely, if ever, happen in practice. But regardless, we should never provide an empty list of `commands` to ExecPolicy. This PR is almost entirely adding test around these cases. ## Testing - [x] Adds a bunch of unit tests for this
This commit is contained in:
parent
b68a84ee8e
commit
cc8c293378
3 changed files with 352 additions and 1 deletions
|
|
@ -345,7 +345,9 @@ fn default_policy_path(codex_home: &Path) -> PathBuf {
|
|||
}
|
||||
|
||||
fn commands_for_exec_policy(command: &[String]) -> (Vec<Vec<String>>, bool) {
|
||||
if let Some(commands) = parse_shell_lc_plain_commands(command) {
|
||||
if let Some(commands) = parse_shell_lc_plain_commands(command)
|
||||
&& !commands.is_empty()
|
||||
{
|
||||
return (commands, false);
|
||||
}
|
||||
|
||||
|
|
@ -814,6 +816,24 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn commands_for_exec_policy_falls_back_for_empty_shell_script() {
|
||||
let command = vec!["bash".to_string(), "-lc".to_string(), "".to_string()];
|
||||
|
||||
assert_eq!(commands_for_exec_policy(&command), (vec![command], false));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn commands_for_exec_policy_falls_back_for_whitespace_shell_script() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
" \n\t ".to_string(),
|
||||
];
|
||||
|
||||
assert_eq!(commands_for_exec_policy(&command), (vec![command], false));
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn evaluates_heredoc_script_against_prefix_rules() {
|
||||
let policy_src = r#"prefix_rule(pattern=["python3"], decision="allow")"#;
|
||||
|
|
@ -1023,6 +1043,58 @@ prefix_rule(
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn empty_bash_lc_script_falls_back_to_original_command() {
|
||||
let command = vec!["bash".to_string(), "-lc".to_string(), "".to_string()];
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn whitespace_bash_lc_script_falls_back_to_original_command() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
" \n\t ".to_string(),
|
||||
];
|
||||
|
||||
let manager = ExecPolicyManager::default();
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::UnlessTrusted,
|
||||
sandbox_policy: &SandboxPolicy::ReadOnly,
|
||||
sandbox_permissions: SandboxPermissions::UseDefault,
|
||||
prefix_rule: None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(command)),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn request_rule_uses_prefix_rule() {
|
||||
let command = vec![
|
||||
|
|
|
|||
|
|
@ -1,11 +1,15 @@
|
|||
#![allow(clippy::unwrap_used, clippy::expect_used)]
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::CollaborationMode;
|
||||
use codex_protocol::config_types::ModeKind;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::config_types::Settings;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
|
|
@ -16,9 +20,59 @@ use core_test_support::responses::sse;
|
|||
use core_test_support::responses::start_mock_server;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use serde_json::Value;
|
||||
use serde_json::json;
|
||||
use std::fs;
|
||||
|
||||
fn collaboration_mode_for_model(model: String) -> CollaborationMode {
|
||||
CollaborationMode {
|
||||
mode: ModeKind::Default,
|
||||
settings: Settings {
|
||||
model,
|
||||
reasoning_effort: None,
|
||||
developer_instructions: Some("exercise approvals in collaboration mode".to_string()),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
async fn submit_user_turn(
|
||||
test: &core_test_support::test_codex::TestCodex,
|
||||
prompt: &str,
|
||||
approval_policy: AskForApproval,
|
||||
sandbox_policy: SandboxPolicy,
|
||||
collaboration_mode: Option<CollaborationMode>,
|
||||
) -> Result<()> {
|
||||
let session_model = test.session_configured.model.clone();
|
||||
test.codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: prompt.into(),
|
||||
text_elements: Vec::new(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: test.cwd_path().to_path_buf(),
|
||||
approval_policy,
|
||||
sandbox_policy,
|
||||
model: session_model,
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
collaboration_mode,
|
||||
personality: None,
|
||||
})
|
||||
.await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn assert_no_matched_rules_invariant(output_item: &Value) {
|
||||
let Some(output) = output_item.get("output").and_then(Value::as_str) else {
|
||||
panic!("function_call_output should include string output payload: {output_item:?}");
|
||||
};
|
||||
assert!(
|
||||
!output.contains("invariant failed: matched_rules must be non-empty"),
|
||||
"unexpected invariant panic surfaced in output: {output}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
||||
// TODO execpolicy doesn't parse powershell commands yet
|
||||
|
|
@ -107,3 +161,213 @@ async fn execpolicy_blocks_shell_invocation() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.features.enable(Feature::CollaborationModes);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let call_id = "shell-empty-script-collab";
|
||||
let args = json!({
|
||||
"command": "",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-empty-shell-1"),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-empty-shell-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-empty-shell-1", "done"),
|
||||
ev_completed("resp-empty-shell-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
||||
submit_user_turn(
|
||||
&test,
|
||||
"run an empty shell command",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
Some(collaboration_mode),
|
||||
)
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let output_item = results_mock.single_request().function_call_output(call_id);
|
||||
assert_no_matched_rules_invariant(&output_item);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_empty_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
config.features.enable(Feature::CollaborationModes);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let call_id = "unified-exec-empty-script-collab";
|
||||
let args = json!({
|
||||
"cmd": "",
|
||||
"yield_time_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-empty-unified-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-empty-unified-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-empty-unified-1", "done"),
|
||||
ev_completed("resp-empty-unified-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
||||
submit_user_turn(
|
||||
&test,
|
||||
"run empty unified exec command",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
Some(collaboration_mode),
|
||||
)
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let output_item = results_mock.single_request().function_call_output(call_id);
|
||||
assert_no_matched_rules_invariant(&output_item);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn shell_command_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.features.enable(Feature::CollaborationModes);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let call_id = "shell-whitespace-script-collab";
|
||||
let args = json!({
|
||||
"command": " \n\t ",
|
||||
"timeout_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-whitespace-shell-1"),
|
||||
ev_function_call(call_id, "shell_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-whitespace-shell-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-whitespace-shell-1", "done"),
|
||||
ev_completed("resp-whitespace-shell-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
||||
submit_user_turn(
|
||||
&test,
|
||||
"run whitespace shell command",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
Some(collaboration_mode),
|
||||
)
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let output_item = results_mock.single_request().function_call_output(call_id);
|
||||
assert_no_matched_rules_invariant(&output_item);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn unified_exec_whitespace_script_with_collaboration_mode_does_not_panic() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let mut builder = test_codex().with_model("gpt-5").with_config(|config| {
|
||||
config.features.enable(Feature::UnifiedExec);
|
||||
config.features.enable(Feature::CollaborationModes);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
let call_id = "unified-exec-whitespace-script-collab";
|
||||
let args = json!({
|
||||
"cmd": " \n \t",
|
||||
"yield_time_ms": 1_000,
|
||||
});
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-whitespace-unified-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-whitespace-unified-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let results_mock = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-whitespace-unified-1", "done"),
|
||||
ev_completed("resp-whitespace-unified-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let collaboration_mode = collaboration_mode_for_model(test.session_configured.model.clone());
|
||||
submit_user_turn(
|
||||
&test,
|
||||
"run whitespace unified exec command",
|
||||
AskForApproval::OnRequest,
|
||||
SandboxPolicy::DangerFullAccess,
|
||||
Some(collaboration_mode),
|
||||
)
|
||||
.await?;
|
||||
|
||||
wait_for_event(&test.codex, |event| {
|
||||
matches!(event, EventMsg::TurnComplete(_))
|
||||
})
|
||||
.await;
|
||||
|
||||
let output_item = results_mock.single_request().function_call_output(call_id);
|
||||
assert_no_matched_rules_invariant(&output_item);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -431,6 +431,21 @@ mod tests {
|
|||
assert!(parse_seq("ls &&").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_command_position_with_leading_operator() {
|
||||
assert!(parse_seq("&& ls").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_command_position_with_double_separator() {
|
||||
assert!(parse_seq("ls ;; pwd").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn rejects_empty_command_position_with_empty_pipeline_segment() {
|
||||
assert!(parse_seq("ls | | wc").is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_zsh_lc_plain_commands() {
|
||||
let command = vec!["zsh".to_string(), "-lc".to_string(), "ls".to_string()];
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue