fix(core) Filter non-matching prefix rules (#12314)

## Summary `gpt-5.3-codex` really likes to write complicated shell scripts, and suggest a partial prefix_rule that wouldn't actually approve the command. We should only show the `prefix_rule` suggestion from the model if it would actually fully approve the command the user is seeing. This will technically cause more instances of overly-specific suggestions when we fallback, but I think the UX is clearer, particularly when the model doesn't necessarily understand the current limitations of execpolicy parsing. ## Testing - [x] Add unit tests - [x] Add integration tests
2026-02-20 22:02:35 -08:00 · 2026-02-20 22:02:35 -08:00 · a8b4b569fb
commit a8b4b569fb
parent 1779feb6a7
2 changed files with 278 additions and 13 deletions
--- a/codex-rs/core/src/exec_policy.rs
+++ b/codex-rs/core/src/exec_policy.rs
@ -224,6 +224,9 @@ impl ExecPolicyManager {
        let requested_amendment = derive_requested_execpolicy_amendment_from_prefix_rule(
            prefix_rule.as_ref(),
            &evaluation.matched_rules,
+            exec_policy.as_ref(),
+            &commands,
+            &exec_policy_fallback,
        );

        match evaluation.decision {
@ -592,6 +595,9 @@ fn try_derive_execpolicy_amendment_for_allow_rules(
 fn derive_requested_execpolicy_amendment_from_prefix_rule(
    prefix_rule: Option<&Vec<String>>,
    matched_rules: &[RuleMatch],
+    exec_policy: &Policy,
+    commands: &[Vec<String>],
+    exec_policy_fallback: &impl Fn(&[String]) -> Decision,
 ) -> Option<ExecPolicyAmendment> {
    let prefix_rule = prefix_rule?;
    if prefix_rule.is_empty() {
@ -612,7 +618,39 @@ fn derive_requested_execpolicy_amendment_from_prefix_rule(
        return None;
    }

-    Some(ExecPolicyAmendment::new(prefix_rule.clone()))
+    let amendment = ExecPolicyAmendment::new(prefix_rule.clone());
+    if prefix_rule_would_approve_all_commands(
+        exec_policy,
+        &amendment.command,
+        commands,
+        exec_policy_fallback,
+    ) {
+        Some(amendment)
+    } else {
+        None
+    }
+}
+
+fn prefix_rule_would_approve_all_commands(
+    exec_policy: &Policy,
+    prefix_rule: &[String],
+    commands: &[Vec<String>],
+    exec_policy_fallback: &impl Fn(&[String]) -> Decision,
+) -> bool {
+    let mut policy_with_prefix_rule = exec_policy.clone();
+    if policy_with_prefix_rule
+        .add_prefix_rule(prefix_rule, Decision::Allow)
+        .is_err()
+    {
+        return false;
+    }
+
+    commands.iter().all(|command| {
+        policy_with_prefix_rule
+            .check(command, exec_policy_fallback)
+            .decision
+            == Decision::Allow
+    })
 }

 /// Only return a reason when a policy rule drove the prompt decision.
@ -1125,7 +1163,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
    }

    #[tokio::test]
-    async fn keeps_requested_amendment_for_heredoc_fallback_prompts() {
+    async fn drops_requested_amendment_for_heredoc_fallback_prompts_when_it_wont_match() {
        let command = vec![
            "bash".to_string(),
            "-lc".to_string(),
@ -1147,7 +1185,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
            requirement,
            ExecApprovalRequirement::NeedsApproval {
                reason: None,
-                proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(requested_prefix)),
+                proposed_execpolicy_amendment: None,
            }
        );
    }
@ -1472,6 +1510,38 @@ prefix_rule(
        );
    }

+    #[tokio::test]
+    async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands() {
+        let command = vec![
+            "bash".to_string(),
+            "-lc".to_string(),
+            "cargo install cargo-insta && rm -rf /tmp/codex".to_string(),
+        ];
+        let manager = ExecPolicyManager::default();
+
+        let requirement = manager
+            .create_exec_approval_requirement_for_command(ExecApprovalRequest {
+                command: &command,
+                approval_policy: AskForApproval::OnRequest,
+                sandbox_policy: &SandboxPolicy::DangerFullAccess,
+                sandbox_permissions: SandboxPermissions::RequireEscalated,
+                prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
+            })
+            .await;
+
+        assert_eq!(
+            requirement,
+            ExecApprovalRequirement::NeedsApproval {
+                reason: None,
+                proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
+                    "rm".to_string(),
+                    "-rf".to_string(),
+                    "/tmp/codex".to_string(),
+                ])),
+            }
+        );
+    }
+
    #[tokio::test]
    async fn heuristics_apply_when_other_commands_match_policy() {
        let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
@ -1728,11 +1798,28 @@ prefix_rule(
        );
    }

+    fn derive_requested_execpolicy_amendment_for_test(
+        prefix_rule: Option<&Vec<String>>,
+        matched_rules: &[RuleMatch],
+    ) -> Option<ExecPolicyAmendment> {
+        let commands = prefix_rule
+            .cloned()
+            .map(|prefix_rule| vec![prefix_rule])
+            .unwrap_or_else(|| vec![vec!["echo".to_string()]]);
+        derive_requested_execpolicy_amendment_from_prefix_rule(
+            prefix_rule,
+            matched_rules,
+            &Policy::empty(),
+            &commands,
+            &|_: &[String]| Decision::Allow,
+        )
+    }
+
    #[test]
    fn derive_requested_execpolicy_amendment_returns_none_for_missing_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(None, &[])
+            derive_requested_execpolicy_amendment_for_test(None, &[])
        );
    }

@ -1740,7 +1827,7 @@ prefix_rule(
    fn derive_requested_execpolicy_amendment_returns_none_for_empty_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(Some(&Vec::new()), &[])
+            derive_requested_execpolicy_amendment_for_test(Some(&Vec::new()), &[])
        );
    }

@ -1748,7 +1835,7 @@ prefix_rule(
    fn derive_requested_execpolicy_amendment_returns_none_for_exact_banned_prefix_rule() {
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&vec!["python".to_string(), "-c".to_string()]),
                &[],
            )
@ -1767,7 +1854,7 @@ prefix_rule(
        ] {
            assert_eq!(
                None,
-                derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+                derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
            );
        }
    }
@ -1793,7 +1880,7 @@ prefix_rule(
        ] {
            assert_eq!(
                None,
-                derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+                derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
            );
        }
    }
@ -1808,7 +1895,7 @@ prefix_rule(

        assert_eq!(
            Some(ExecPolicyAmendment::new(prefix_rule.clone())),
-            derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
+            derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
        );
    }

@ -1823,7 +1910,7 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
                &matched_rules_prompt
            ),
@ -1836,7 +1923,7 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
                &matched_rules_allow
            ),
@ -1849,9 +1936,9 @@ prefix_rule(
        }];
        assert_eq!(
            None,
-            derive_requested_execpolicy_amendment_from_prefix_rule(
+            derive_requested_execpolicy_amendment_for_test(
                Some(&prefix_rule),
-                &matched_rules_forbidden
+                &matched_rules_forbidden,
            ),
            "should return none when prompt policy matches"
        );
--- a/codex-rs/core/tests/suite/approvals.rs
+++ b/codex-rs/core/tests/suite/approvals.rs
@ -184,6 +184,16 @@ fn shell_event(
    command: &str,
    timeout_ms: u64,
    sandbox_permissions: SandboxPermissions,
+) -> Result<Value> {
+    shell_event_with_prefix_rule(call_id, command, timeout_ms, sandbox_permissions, None)
+}
+
+fn shell_event_with_prefix_rule(
+    call_id: &str,
+    command: &str,
+    timeout_ms: u64,
+    sandbox_permissions: SandboxPermissions,
+    prefix_rule: Option<Vec<String>>,
 ) -> Result<Value> {
    let mut args = json!({
        "command": command,
@ -192,6 +202,9 @@ fn shell_event(
    if sandbox_permissions.requires_escalated_permissions() {
        args["sandbox_permissions"] = json!(sandbox_permissions);
    }
+    if let Some(prefix_rule) = prefix_rule {
+        args["prefix_rule"] = json!(prefix_rule);
+    }
    let args_str = serde_json::to_string(&args)?;
    Ok(ev_function_call(call_id, "shell_command", &args_str))
 }
@ -1928,6 +1941,171 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
    Ok(())
 }

+#[tokio::test(flavor = "current_thread")]
+#[cfg(unix)]
+async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Result<()> {
+    let server = start_mock_server().await;
+    let approval_policy = AskForApproval::OnRequest;
+    let sandbox_policy = SandboxPolicy::new_read_only_policy();
+    let sandbox_policy_for_config = sandbox_policy.clone();
+    let mut builder = test_codex().with_config(move |config| {
+        config.permissions.approval_policy = Constrained::allow_any(approval_policy);
+        config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "invalid-prefix-rule";
+    let command =
+        "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
+    let event = shell_event_with_prefix_rule(
+        call_id,
+        command,
+        1_000,
+        SandboxPermissions::RequireEscalated,
+        Some(vec!["touch".to_string()]),
+    )?;
+
+    let _ = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-invalid-prefix-1"),
+            event,
+            ev_completed("resp-invalid-prefix-1"),
+        ]),
+    )
+    .await;
+
+    submit_turn(
+        &test,
+        "invalid-prefix-rule",
+        approval_policy,
+        sandbox_policy.clone(),
+    )
+    .await?;
+
+    let approval = expect_exec_approval(&test, command).await;
+    let amendment = approval
+        .proposed_execpolicy_amendment
+        .expect("should have a proposed execpolicy amendment");
+    assert!(amendment.command.contains(&command.to_string()));
+
+    Ok(())
+}
+
+#[tokio::test(flavor = "current_thread")]
+#[cfg(unix)]
+async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
+    let server = start_mock_server().await;
+    let approval_policy = AskForApproval::OnRequest;
+    let sandbox_policy = SandboxPolicy::new_read_only_policy();
+    let sandbox_policy_for_config = sandbox_policy.clone();
+    let mut builder = test_codex().with_config(move |config| {
+        config.permissions.approval_policy = Constrained::allow_any(approval_policy);
+        config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
+    });
+    let test = builder.build(&server).await?;
+
+    let call_id = "invalid-prefix-rule";
+    let command =
+        "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
+    let event = shell_event_with_prefix_rule(
+        call_id,
+        command,
+        1_000,
+        SandboxPermissions::RequireEscalated,
+        Some(vec!["touch".to_string()]),
+    )?;
+
+    let _ = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-invalid-prefix-1"),
+            event,
+            ev_completed("resp-invalid-prefix-1"),
+        ]),
+    )
+    .await;
+
+    submit_turn(
+        &test,
+        "invalid-prefix-rule",
+        approval_policy,
+        sandbox_policy.clone(),
+    )
+    .await?;
+
+    let approval = expect_exec_approval(&test, command).await;
+    let approval_id = approval.effective_approval_id();
+    let amendment = approval
+        .proposed_execpolicy_amendment
+        .expect("should have a proposed execpolicy amendment");
+    assert!(amendment.command.contains(&command.to_string()));
+
+    test.codex
+        .submit(Op::ExecApproval {
+            id: approval_id,
+            turn_id: None,
+            decision: ReviewDecision::ApprovedExecpolicyAmendment {
+                proposed_execpolicy_amendment: amendment.clone(),
+            },
+        })
+        .await?;
+    wait_for_completion(&test).await;
+
+    let call_id = "invalid-prefix-rule-again";
+    let command =
+        "touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
+    let event = shell_event_with_prefix_rule(
+        call_id,
+        command,
+        1_000,
+        SandboxPermissions::RequireEscalated,
+        Some(vec!["touch".to_string()]),
+    )?;
+
+    let _ = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_response_created("resp-invalid-prefix-1"),
+            event,
+            ev_completed("resp-invalid-prefix-1"),
+        ]),
+    )
+    .await;
+    let second_results = mount_sse_once(
+        &server,
+        sse(vec![
+            ev_assistant_message("msg-invalid-prefix-1", "done"),
+            ev_completed("resp-invalid-prefix-2"),
+        ]),
+    )
+    .await;
+
+    submit_turn(
+        &test,
+        "invalid-prefix-rule",
+        approval_policy,
+        sandbox_policy.clone(),
+    )
+    .await?;
+
+    wait_for_completion_without_approval(&test).await;
+
+    let second_output = parse_result(
+        &second_results
+            .single_request()
+            .function_call_output(call_id),
+    );
+    assert_eq!(second_output.exit_code.unwrap_or(0), 0);
+    assert!(
+        second_output.stdout.is_empty(),
+        "unexpected stdout: {}",
+        second_output.stdout
+    );
+
+    Ok(())
+}
+
 // todo(dylan) add ScenarioSpec support for rules
 #[tokio::test(flavor = "current_thread")]
 #[cfg(unix)]