feat: record whether a skill script is approved for the session (#12756)

## Why

`unix_escalation.rs` checks a session-scoped approval cache before
prompting again for an execve-intercepted skill script. Without also
recording `ReviewDecision::ApprovedForSession`, that cache never gets
populated, so the same skill script can still trigger repeated approval
prompts within one session.

## What Changed

- Add `execve_session_approvals` to `SessionServices` so the session can
track approved skill script paths.
- Record the script path when a skill-script prompt returns
`ReviewDecision::ApprovedForSession`, but only for the skill-script path
rather than broader prefix-rule approvals.
- Reuse the cached approval on later execve callbacks by treating an
already-approved skill script as `Decision::Allow`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/12756).
* #12758
* __->__ #12756
This commit is contained in:
Michael Bolin 2026-02-25 02:17:22 -08:00 committed by GitHub
parent 6d6570d89d
commit 93efcfd50d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 56 additions and 3 deletions

View file

@ -1355,6 +1355,7 @@ impl Session {
otel_manager,
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashSet::new()),
skills_manager,
file_watcher,
agent_control,
@ -8233,6 +8234,7 @@ mod tests {
otel_manager: otel_manager.clone(),
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashSet::new()),
skills_manager,
file_watcher,
agent_control,
@ -8390,6 +8392,7 @@ mod tests {
otel_manager: otel_manager.clone(),
models_manager: Arc::clone(&models_manager),
tool_approvals: Mutex::new(ApprovalStore::default()),
execve_session_approvals: RwLock::new(HashSet::new()),
skills_manager,
file_watcher,
agent_control,

View file

@ -1,3 +1,4 @@
use std::collections::HashSet;
use std::sync::Arc;
use crate::AuthManager;
@ -17,6 +18,7 @@ use crate::tools::sandboxing::ApprovalStore;
use crate::unified_exec::UnifiedExecProcessManager;
use codex_hooks::Hooks;
use codex_otel::OtelManager;
use codex_utils_absolute_path::AbsolutePathBuf;
use std::path::PathBuf;
use tokio::sync::Mutex;
use tokio::sync::RwLock;
@ -42,6 +44,8 @@ pub(crate) struct SessionServices {
pub(crate) models_manager: Arc<ModelsManager>,
pub(crate) otel_manager: OtelManager,
pub(crate) tool_approvals: Mutex<ApprovalStore>,
#[cfg_attr(not(unix), allow(dead_code))]
pub(crate) execve_session_approvals: RwLock<HashSet<AbsolutePathBuf>>,
pub(crate) skills_manager: Arc<SkillsManager>,
pub(crate) file_watcher: Arc<FileWatcher>,
pub(crate) agent_control: AgentControl,

View file

@ -166,6 +166,13 @@ struct CoreShellActionProvider {
stopwatch: Stopwatch,
}
enum DecisionSource {
SkillScript,
PrefixRule,
/// Often, this is `is_safe_command()`.
UnmatchedCommandFallback,
}
impl CoreShellActionProvider {
fn decision_driven_by_policy(matched_rules: &[RuleMatch], decision: Decision) -> bool {
matched_rules.iter().any(|rule_match| {
@ -233,6 +240,7 @@ impl CoreShellActionProvider {
None
}
#[allow(clippy::too_many_arguments)]
async fn process_decision(
&self,
decision: Decision,
@ -241,6 +249,7 @@ impl CoreShellActionProvider {
argv: &[String],
workdir: &AbsolutePathBuf,
additional_permissions: Option<PermissionProfile>,
decision_source: DecisionSource,
) -> anyhow::Result<EscalateAction> {
let action = match decision {
Decision::Forbidden => EscalateAction::Deny {
@ -267,8 +276,26 @@ impl CoreShellActionProvider {
.await?
{
ReviewDecision::Approved
| ReviewDecision::ApprovedExecpolicyAmendment { .. }
| ReviewDecision::ApprovedForSession => {
| ReviewDecision::ApprovedExecpolicyAmendment { .. } => {
if needs_escalation {
EscalateAction::Escalate
} else {
EscalateAction::Run
}
}
ReviewDecision::ApprovedForSession => {
// Currently, we only add session approvals for
// skill scripts because we are storing only the
// `program` whereas prefix rules may be restricted by a longer prefix.
if matches!(decision_source, DecisionSource::SkillScript) {
self.session
.services
.execve_session_approvals
.write()
.await
.insert(program.clone());
}
if needs_escalation {
EscalateAction::Escalate
} else {
@ -337,14 +364,27 @@ impl EscalationPolicy for CoreShellActionProvider {
// EscalateAction::Run case, rather than always escalating when a
// skill matches.
let needs_escalation = true;
let is_approved_for_session = self
.session
.services
.execve_session_approvals
.read()
.await
.contains(program);
let decision = if is_approved_for_session {
Decision::Allow
} else {
Decision::Prompt
};
return self
.process_decision(
Decision::Prompt,
decision,
needs_escalation,
program,
argv,
workdir,
skill.permission_profile.clone(),
DecisionSource::SkillScript,
)
.await;
}
@ -379,6 +419,11 @@ impl EscalationPolicy for CoreShellActionProvider {
let needs_escalation =
self.sandbox_permissions.requires_escalated_permissions() || decision_driven_by_policy;
let decision_source = if decision_driven_by_policy {
DecisionSource::PrefixRule
} else {
DecisionSource::UnmatchedCommandFallback
};
self.process_decision(
evaluation.decision,
needs_escalation,
@ -386,6 +431,7 @@ impl EscalationPolicy for CoreShellActionProvider {
argv,
workdir,
None,
decision_source,
)
.await
}