From be5bca6f8d5864d4e82e685cd213649799f66ce0 Mon Sep 17 00:00:00 2001 From: Michael Bolin Date: Wed, 25 Feb 2026 12:23:30 -0800 Subject: [PATCH] fix: harden zsh fork tests and keep subcommand approvals deterministic (#12809) ## Why The prior `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` assertion was brittle under Bazel: command approval payloads in the test could include environment-dependent wrapper/command formatting differences, which makes exact command-string matching flaky even when behavior is correct. (This regression was knowingly introduced in https://github.com/openai/codex/pull/12800, but it was urgent to land that PR.) ## What changed - Hardened `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` in [`turn_start_zsh_fork.rs`](https://github.com/openai/codex/blob/main/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs): - Replaced strict `approval_command.starts_with("/bin/rm")` checks with intent-based subcommand matching. - Subcommand approvals are now recognized by file-target semantics (`first.txt` or `second.txt`) plus `rm` intent. - Parent approval recognition is now more tolerant of command-format differences while still requiring a definitive parent command context. - Uses a defensive loop that waits for all target subcommand decisions and the parent approval request. - Preserved the existing regression and unit test fixes from earlier commits in `unix_escalation.rs` and `skill_approval.rs`. ## Verification - Ran the zsh fork subcommand decline regression under this change: - `turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2` - Confirmed the test is now robust against approval-command-string variation instead of hardcoding one expected command shape. --- .../tests/suite/v2/turn_start_zsh_fork.rs | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs index 340a5e156..bdbb28343 100644 --- a/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs +++ b/codex-rs/app-server/tests/suite/v2/turn_start_zsh_fork.rs @@ -13,6 +13,7 @@ use app_test_support::create_mock_responses_server_sequence; use app_test_support::create_mock_responses_server_sequence_unchecked; use app_test_support::create_shell_command_sse_response; use app_test_support::to_response; +use codex_app_server_protocol::CommandAction; use codex_app_server_protocol::CommandExecutionApprovalDecision; use codex_app_server_protocol::CommandExecutionRequestApprovalResponse; use codex_app_server_protocol::CommandExecutionStatus; @@ -542,7 +543,10 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() CommandExecutionApprovalDecision::Cancel, ]; let mut target_decision_index = 0; - while target_decision_index < target_decisions.len() { + let first_file_str = first_file.to_string_lossy().into_owned(); + let second_file_str = second_file.to_string_lossy().into_owned(); + let parent_shell_hint = format!("&& {}", &first_file_str); + while target_decision_index < target_decisions.len() || !saw_parent_approval { let server_req = timeout( DEFAULT_READ_TIMEOUT, mcp.read_stream_until_request_message(), @@ -558,16 +562,21 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() .command .as_deref() .expect("approval command should be present"); - let is_target_subcommand = (approval_command.starts_with("/bin/rm ") - || approval_command.starts_with("/usr/bin/rm ")) - && (approval_command.contains(&first_file.display().to_string()) - || approval_command.contains(&second_file.display().to_string())); + let has_first_file = approval_command.contains(&first_file_str); + let has_second_file = approval_command.contains(&second_file_str); + let mentions_rm_binary = + approval_command.contains("/bin/rm ") || approval_command.contains("/usr/bin/rm "); + let has_rm_action = params.command_actions.as_ref().is_some_and(|actions| { + actions.iter().any(|action| match action { + CommandAction::Read { name, .. } => name == "rm", + CommandAction::Unknown { command } => command.contains("rm"), + _ => false, + }) + }); + let is_target_subcommand = + (has_first_file != has_second_file) && (has_rm_action || mentions_rm_binary); + if is_target_subcommand { - assert!( - approval_command.contains(&first_file.display().to_string()) - || approval_command.contains(&second_file.display().to_string()), - "expected zsh subcommand approval for one of the rm commands, got: {approval_command}" - ); approved_subcommand_ids.push( params .approval_id @@ -577,7 +586,9 @@ async fn turn_start_shell_zsh_fork_subcommand_decline_marks_parent_declined_v2() approved_subcommand_strings.push(approval_command.to_string()); } let is_parent_approval = approval_command.contains(&zsh_path.display().to_string()) - && approval_command.contains(&shell_command); + && (approval_command.contains(&shell_command) + || (has_first_file && has_second_file) + || approval_command.contains(&parent_shell_hint)); let decision = if is_target_subcommand { let decision = target_decisions[target_decision_index].clone(); target_decision_index += 1;