fix(core) Filter non-matching prefix rules (#12314)
## Summary `gpt-5.3-codex` really likes to write complicated shell scripts, and suggest a partial prefix_rule that wouldn't actually approve the command. We should only show the `prefix_rule` suggestion from the model if it would actually fully approve the command the user is seeing. This will technically cause more instances of overly-specific suggestions when we fallback, but I think the UX is clearer, particularly when the model doesn't necessarily understand the current limitations of execpolicy parsing. ## Testing - [x] Add unit tests - [x] Add integration tests
This commit is contained in:
parent
1779feb6a7
commit
a8b4b569fb
2 changed files with 278 additions and 13 deletions
|
|
@ -224,6 +224,9 @@ impl ExecPolicyManager {
|
|||
let requested_amendment = derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
prefix_rule.as_ref(),
|
||||
&evaluation.matched_rules,
|
||||
exec_policy.as_ref(),
|
||||
&commands,
|
||||
&exec_policy_fallback,
|
||||
);
|
||||
|
||||
match evaluation.decision {
|
||||
|
|
@ -592,6 +595,9 @@ fn try_derive_execpolicy_amendment_for_allow_rules(
|
|||
fn derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
prefix_rule: Option<&Vec<String>>,
|
||||
matched_rules: &[RuleMatch],
|
||||
exec_policy: &Policy,
|
||||
commands: &[Vec<String>],
|
||||
exec_policy_fallback: &impl Fn(&[String]) -> Decision,
|
||||
) -> Option<ExecPolicyAmendment> {
|
||||
let prefix_rule = prefix_rule?;
|
||||
if prefix_rule.is_empty() {
|
||||
|
|
@ -612,7 +618,39 @@ fn derive_requested_execpolicy_amendment_from_prefix_rule(
|
|||
return None;
|
||||
}
|
||||
|
||||
Some(ExecPolicyAmendment::new(prefix_rule.clone()))
|
||||
let amendment = ExecPolicyAmendment::new(prefix_rule.clone());
|
||||
if prefix_rule_would_approve_all_commands(
|
||||
exec_policy,
|
||||
&amendment.command,
|
||||
commands,
|
||||
exec_policy_fallback,
|
||||
) {
|
||||
Some(amendment)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn prefix_rule_would_approve_all_commands(
|
||||
exec_policy: &Policy,
|
||||
prefix_rule: &[String],
|
||||
commands: &[Vec<String>],
|
||||
exec_policy_fallback: &impl Fn(&[String]) -> Decision,
|
||||
) -> bool {
|
||||
let mut policy_with_prefix_rule = exec_policy.clone();
|
||||
if policy_with_prefix_rule
|
||||
.add_prefix_rule(prefix_rule, Decision::Allow)
|
||||
.is_err()
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
commands.iter().all(|command| {
|
||||
policy_with_prefix_rule
|
||||
.check(command, exec_policy_fallback)
|
||||
.decision
|
||||
== Decision::Allow
|
||||
})
|
||||
}
|
||||
|
||||
/// Only return a reason when a policy rule drove the prompt decision.
|
||||
|
|
@ -1125,7 +1163,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn keeps_requested_amendment_for_heredoc_fallback_prompts() {
|
||||
async fn drops_requested_amendment_for_heredoc_fallback_prompts_when_it_wont_match() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
|
|
@ -1147,7 +1185,7 @@ prefix_rule(pattern=["rm"], decision="forbidden")
|
|||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(requested_prefix)),
|
||||
proposed_execpolicy_amendment: None,
|
||||
}
|
||||
);
|
||||
}
|
||||
|
|
@ -1472,6 +1510,38 @@ prefix_rule(
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn request_rule_falls_back_when_prefix_rule_does_not_approve_all_commands() {
|
||||
let command = vec![
|
||||
"bash".to_string(),
|
||||
"-lc".to_string(),
|
||||
"cargo install cargo-insta && rm -rf /tmp/codex".to_string(),
|
||||
];
|
||||
let manager = ExecPolicyManager::default();
|
||||
|
||||
let requirement = manager
|
||||
.create_exec_approval_requirement_for_command(ExecApprovalRequest {
|
||||
command: &command,
|
||||
approval_policy: AskForApproval::OnRequest,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
sandbox_permissions: SandboxPermissions::RequireEscalated,
|
||||
prefix_rule: Some(vec!["cargo".to_string(), "install".to_string()]),
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(
|
||||
requirement,
|
||||
ExecApprovalRequirement::NeedsApproval {
|
||||
reason: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"rm".to_string(),
|
||||
"-rf".to_string(),
|
||||
"/tmp/codex".to_string(),
|
||||
])),
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn heuristics_apply_when_other_commands_match_policy() {
|
||||
let policy_src = r#"prefix_rule(pattern=["apple"], decision="allow")"#;
|
||||
|
|
@ -1728,11 +1798,28 @@ prefix_rule(
|
|||
);
|
||||
}
|
||||
|
||||
fn derive_requested_execpolicy_amendment_for_test(
|
||||
prefix_rule: Option<&Vec<String>>,
|
||||
matched_rules: &[RuleMatch],
|
||||
) -> Option<ExecPolicyAmendment> {
|
||||
let commands = prefix_rule
|
||||
.cloned()
|
||||
.map(|prefix_rule| vec![prefix_rule])
|
||||
.unwrap_or_else(|| vec![vec!["echo".to_string()]]);
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
prefix_rule,
|
||||
matched_rules,
|
||||
&Policy::empty(),
|
||||
&commands,
|
||||
&|_: &[String]| Decision::Allow,
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn derive_requested_execpolicy_amendment_returns_none_for_missing_prefix_rule() {
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(None, &[])
|
||||
derive_requested_execpolicy_amendment_for_test(None, &[])
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1740,7 +1827,7 @@ prefix_rule(
|
|||
fn derive_requested_execpolicy_amendment_returns_none_for_empty_prefix_rule() {
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(Some(&Vec::new()), &[])
|
||||
derive_requested_execpolicy_amendment_for_test(Some(&Vec::new()), &[])
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1748,7 +1835,7 @@ prefix_rule(
|
|||
fn derive_requested_execpolicy_amendment_returns_none_for_exact_banned_prefix_rule() {
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
derive_requested_execpolicy_amendment_for_test(
|
||||
Some(&vec!["python".to_string(), "-c".to_string()]),
|
||||
&[],
|
||||
)
|
||||
|
|
@ -1767,7 +1854,7 @@ prefix_rule(
|
|||
] {
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
|
||||
derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1793,7 +1880,7 @@ prefix_rule(
|
|||
] {
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
|
||||
derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -1808,7 +1895,7 @@ prefix_rule(
|
|||
|
||||
assert_eq!(
|
||||
Some(ExecPolicyAmendment::new(prefix_rule.clone())),
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(Some(&prefix_rule), &[])
|
||||
derive_requested_execpolicy_amendment_for_test(Some(&prefix_rule), &[])
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -1823,7 +1910,7 @@ prefix_rule(
|
|||
}];
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
derive_requested_execpolicy_amendment_for_test(
|
||||
Some(&prefix_rule),
|
||||
&matched_rules_prompt
|
||||
),
|
||||
|
|
@ -1836,7 +1923,7 @@ prefix_rule(
|
|||
}];
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
derive_requested_execpolicy_amendment_for_test(
|
||||
Some(&prefix_rule),
|
||||
&matched_rules_allow
|
||||
),
|
||||
|
|
@ -1849,9 +1936,9 @@ prefix_rule(
|
|||
}];
|
||||
assert_eq!(
|
||||
None,
|
||||
derive_requested_execpolicy_amendment_from_prefix_rule(
|
||||
derive_requested_execpolicy_amendment_for_test(
|
||||
Some(&prefix_rule),
|
||||
&matched_rules_forbidden
|
||||
&matched_rules_forbidden,
|
||||
),
|
||||
"should return none when prompt policy matches"
|
||||
);
|
||||
|
|
|
|||
|
|
@ -184,6 +184,16 @@ fn shell_event(
|
|||
command: &str,
|
||||
timeout_ms: u64,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
) -> Result<Value> {
|
||||
shell_event_with_prefix_rule(call_id, command, timeout_ms, sandbox_permissions, None)
|
||||
}
|
||||
|
||||
fn shell_event_with_prefix_rule(
|
||||
call_id: &str,
|
||||
command: &str,
|
||||
timeout_ms: u64,
|
||||
sandbox_permissions: SandboxPermissions,
|
||||
prefix_rule: Option<Vec<String>>,
|
||||
) -> Result<Value> {
|
||||
let mut args = json!({
|
||||
"command": command,
|
||||
|
|
@ -192,6 +202,9 @@ fn shell_event(
|
|||
if sandbox_permissions.requires_escalated_permissions() {
|
||||
args["sandbox_permissions"] = json!(sandbox_permissions);
|
||||
}
|
||||
if let Some(prefix_rule) = prefix_rule {
|
||||
args["prefix_rule"] = json!(prefix_rule);
|
||||
}
|
||||
let args_str = serde_json::to_string(&args)?;
|
||||
Ok(ev_function_call(call_id, "shell_command", &args_str))
|
||||
}
|
||||
|
|
@ -1928,6 +1941,171 @@ async fn approving_execpolicy_amendment_persists_policy_and_skips_future_prompts
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
#[cfg(unix)]
|
||||
async fn invalid_requested_prefix_rule_falls_back_for_compound_command() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
let sandbox_policy_for_config = sandbox_policy.clone();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
||||
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "invalid-prefix-rule";
|
||||
let command =
|
||||
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
||||
let event = shell_event_with_prefix_rule(
|
||||
call_id,
|
||||
command,
|
||||
1_000,
|
||||
SandboxPermissions::RequireEscalated,
|
||||
Some(vec!["touch".to_string()]),
|
||||
)?;
|
||||
|
||||
let _ = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-invalid-prefix-1"),
|
||||
event,
|
||||
ev_completed("resp-invalid-prefix-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"invalid-prefix-rule",
|
||||
approval_policy,
|
||||
sandbox_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(&test, command).await;
|
||||
let amendment = approval
|
||||
.proposed_execpolicy_amendment
|
||||
.expect("should have a proposed execpolicy amendment");
|
||||
assert!(amendment.command.contains(&command.to_string()));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
#[cfg(unix)]
|
||||
async fn approving_fallback_rule_for_compound_command_works() -> Result<()> {
|
||||
let server = start_mock_server().await;
|
||||
let approval_policy = AskForApproval::OnRequest;
|
||||
let sandbox_policy = SandboxPolicy::new_read_only_policy();
|
||||
let sandbox_policy_for_config = sandbox_policy.clone();
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.permissions.approval_policy = Constrained::allow_any(approval_policy);
|
||||
config.permissions.sandbox_policy = Constrained::allow_any(sandbox_policy_for_config);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let call_id = "invalid-prefix-rule";
|
||||
let command =
|
||||
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
||||
let event = shell_event_with_prefix_rule(
|
||||
call_id,
|
||||
command,
|
||||
1_000,
|
||||
SandboxPermissions::RequireEscalated,
|
||||
Some(vec!["touch".to_string()]),
|
||||
)?;
|
||||
|
||||
let _ = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-invalid-prefix-1"),
|
||||
event,
|
||||
ev_completed("resp-invalid-prefix-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"invalid-prefix-rule",
|
||||
approval_policy,
|
||||
sandbox_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
let approval = expect_exec_approval(&test, command).await;
|
||||
let approval_id = approval.effective_approval_id();
|
||||
let amendment = approval
|
||||
.proposed_execpolicy_amendment
|
||||
.expect("should have a proposed execpolicy amendment");
|
||||
assert!(amendment.command.contains(&command.to_string()));
|
||||
|
||||
test.codex
|
||||
.submit(Op::ExecApproval {
|
||||
id: approval_id,
|
||||
turn_id: None,
|
||||
decision: ReviewDecision::ApprovedExecpolicyAmendment {
|
||||
proposed_execpolicy_amendment: amendment.clone(),
|
||||
},
|
||||
})
|
||||
.await?;
|
||||
wait_for_completion(&test).await;
|
||||
|
||||
let call_id = "invalid-prefix-rule-again";
|
||||
let command =
|
||||
"touch /tmp/codex-fallback-rule-test.txt && echo hello > /tmp/codex-fallback-rule-test.txt";
|
||||
let event = shell_event_with_prefix_rule(
|
||||
call_id,
|
||||
command,
|
||||
1_000,
|
||||
SandboxPermissions::RequireEscalated,
|
||||
Some(vec!["touch".to_string()]),
|
||||
)?;
|
||||
|
||||
let _ = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-invalid-prefix-1"),
|
||||
event,
|
||||
ev_completed("resp-invalid-prefix-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let second_results = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-invalid-prefix-1", "done"),
|
||||
ev_completed("resp-invalid-prefix-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
submit_turn(
|
||||
&test,
|
||||
"invalid-prefix-rule",
|
||||
approval_policy,
|
||||
sandbox_policy.clone(),
|
||||
)
|
||||
.await?;
|
||||
|
||||
wait_for_completion_without_approval(&test).await;
|
||||
|
||||
let second_output = parse_result(
|
||||
&second_results
|
||||
.single_request()
|
||||
.function_call_output(call_id),
|
||||
);
|
||||
assert_eq!(second_output.exit_code.unwrap_or(0), 0);
|
||||
assert!(
|
||||
second_output.stdout.is_empty(),
|
||||
"unexpected stdout: {}",
|
||||
second_output.stdout
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// todo(dylan) add ScenarioSpec support for rules
|
||||
#[tokio::test(flavor = "current_thread")]
|
||||
#[cfg(unix)]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue