From f23fcd6ced0035f2aa4e34d4f12da4f04c8a7fa4 Mon Sep 17 00:00:00 2001 From: Charley Cunningham Date: Mon, 9 Mar 2026 09:25:24 -0700 Subject: [PATCH] guardian initial feedback / tweaks (#13897) ## Summary - remove the remaining model-visible guardian-specific `on-request` prompt additions so enabling the feature does not change the main approval-policy instructions - neutralize user-facing guardian wording to talk about automatic approval review / approval requests rather than a second reviewer or only sandbox escalations - tighten guardian retry-context handling so agent-authored `justification` stays in the structured action JSON and is not also injected as raw retry context - simplify guardian review plumbing in core by deleting dead prompt-append paths and trimming some request/transcript setup code ## Notable Changes - delete the dead `permissions/approval_policy/guardian.md` append path and stop threading `guardian_approval_enabled` through model-facing developer-instruction builders - rename the experimental feature copy to `Automatic approval review` and update the `/experimental` snapshot text accordingly - make approval-review status strings generic across shell, patch, network, and MCP review types - forward real sandbox/network retry reasons for shell and unified-exec guardian review, but do not pass agent-authored justification as raw retry context - simplify `guardian.rs` by removing the one-field request wrapper, deduping reasoning-effort selection, and cleaning up transcript entry collection ## Testing - `just fmt` - full validation left to CI --------- Co-authored-by: Codex --- codex-rs/core/src/codex.rs | 1 - codex-rs/core/src/context_manager/updates.rs | 1 - codex-rs/core/src/features.rs | 13 +- codex-rs/core/src/guardian.rs | 347 ++++++++++++++---- codex-rs/core/src/guardian_tests.rs | 50 +-- codex-rs/core/src/mcp_tool_call.rs | 151 +++----- codex-rs/core/src/tools/network_approval.rs | 16 +- .../core/src/tools/runtimes/apply_patch.rs | 35 +- codex-rs/core/src/tools/runtimes/shell.rs | 40 +- .../tools/runtimes/shell/unix_escalation.rs | 26 +- .../core/src/tools/runtimes/unified_exec.rs | 42 +-- .../core/tests/suite/permissions_messages.rs | 1 - codex-rs/protocol/src/models.rs | 37 +- .../permissions/approval_policy/guardian.md | 3 - ...ntal_popup_includes_guardian_approval.snap | 5 +- ...opup_includes_guardian_approval_linux.snap | 5 +- 16 files changed, 421 insertions(+), 352 deletions(-) delete mode 100644 codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index ef8e7b21b..6d10e6873 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -3265,7 +3265,6 @@ impl Session { DeveloperInstructions::from_policy( turn_context.sandbox_policy.get(), turn_context.approval_policy.value(), - turn_context.features.enabled(Feature::GuardianApproval), self.services.exec_policy.current().as_ref(), &turn_context.cwd, turn_context.features.enabled(Feature::RequestPermissions), diff --git a/codex-rs/core/src/context_manager/updates.rs b/codex-rs/core/src/context_manager/updates.rs index efbb03174..63deb5c80 100644 --- a/codex-rs/core/src/context_manager/updates.rs +++ b/codex-rs/core/src/context_manager/updates.rs @@ -43,7 +43,6 @@ fn build_permissions_update_item( Some(DeveloperInstructions::from_policy( next.sandbox_policy.get(), next.approval_policy.value(), - next.features.enabled(Feature::GuardianApproval), exec_policy, &next.cwd, next.features.enabled(Feature::RequestPermissions), diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 3fa716d11..529203e1c 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -149,7 +149,7 @@ pub enum Feature { Steer, /// Allow request_user_input in Default collaboration mode. DefaultModeRequestUserInput, - /// Enable guardian subagent approvals. + /// Enable automatic review for approval prompts. GuardianApproval, /// Enable collaboration modes (Plan, Default). /// Kept for config backward compatibility; behavior is always collaboration-modes-enabled. @@ -710,8 +710,8 @@ pub const FEATURES: &[FeatureSpec] = &[ id: Feature::GuardianApproval, key: "guardian_approval", stage: Stage::Experimental { - name: "Guardian approvals", - menu_description: "Let a guardian subagent review `on-request` approval prompts instead of showing them to you, including sandbox escapes and blocked network access.", + name: "Automatic approval review", + menu_description: "Dispatch `on-request` approval prompts (for e.g. sandbox escapes or blocked network access) to a carefully-prompted security reviewer subagent rather than blocking the agent on your input.", announcement: "", }, default_enabled: false, @@ -917,11 +917,14 @@ mod tests { let stage = spec.stage; assert!(matches!(stage, Stage::Experimental { .. })); - assert_eq!(stage.experimental_menu_name(), Some("Guardian approvals")); + assert_eq!( + stage.experimental_menu_name(), + Some("Automatic approval review") + ); assert_eq!( stage.experimental_menu_description().map(str::to_owned), Some( - "Let a guardian subagent review `on-request` approval prompts instead of showing them to you, including sandbox escapes and blocked network access.".to_string() + "Dispatch `on-request` approval prompts (for e.g. sandbox escapes or blocked network access) to a carefully-prompted security reviewer subagent rather than blocking the agent on your input.".to_string() ) ); assert_eq!(stage.experimental_announcement(), None); diff --git a/codex-rs/core/src/guardian.rs b/codex-rs/core/src/guardian.rs index 385b8190f..a2c36b482 100644 --- a/codex-rs/core/src/guardian.rs +++ b/codex-rs/core/src/guardian.rs @@ -12,15 +12,19 @@ //! 4. Approve only low- and medium-risk actions (`risk_score < 80`). use std::collections::HashMap; +use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; +use codex_protocol::approvals::NetworkApprovalProtocol; +use codex_protocol::models::PermissionProfile; use codex_protocol::models::ResponseItem; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::SubAgentSource; use codex_protocol::protocol::WarningEvent; use codex_protocol::user_input::UserInput; +use codex_utils_absolute_path::AbsolutePathBuf; use serde::Deserialize; use serde::Serialize; use serde_json::Value; @@ -66,7 +70,7 @@ const GUARDIAN_RECENT_ENTRY_LIMIT: usize = 40; const GUARDIAN_TRUNCATION_TAG: &str = "guardian_truncated"; pub(crate) const GUARDIAN_REJECTION_MESSAGE: &str = concat!( - "Guardian rejected this action due to unacceptable risk. ", + "This action was rejected due to unacceptable risk. ", "The agent must not attempt to achieve the same outcome via workaround, ", "indirect execution, or policy circumvention. ", "Proceed only with a materially safer alternative, or stop and request user input.", @@ -89,12 +93,6 @@ pub(crate) fn is_guardian_subagent_source( ) } -/// Canonical description of the action the guardian is being asked to review. -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct GuardianReviewRequest { - pub(crate) action: Value, -} - /// Coarse risk label paired with the numeric `risk_score`. #[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq)] #[serde(rename_all = "lowercase")] @@ -120,6 +118,66 @@ pub(crate) struct GuardianAssessment { evidence: Vec, } +#[derive(Debug, Clone, PartialEq)] +pub(crate) enum GuardianApprovalRequest { + Shell { + command: Vec, + cwd: PathBuf, + sandbox_permissions: crate::sandboxing::SandboxPermissions, + additional_permissions: Option, + justification: Option, + }, + ExecCommand { + command: Vec, + cwd: PathBuf, + sandbox_permissions: crate::sandboxing::SandboxPermissions, + additional_permissions: Option, + justification: Option, + tty: bool, + }, + #[cfg(unix)] + Execve { + tool_name: String, + program: String, + argv: Vec, + cwd: PathBuf, + additional_permissions: Option, + }, + ApplyPatch { + cwd: PathBuf, + files: Vec, + change_count: usize, + patch: String, + }, + NetworkAccess { + target: String, + host: String, + protocol: NetworkApprovalProtocol, + port: u16, + }, + McpToolCall { + server: String, + tool_name: String, + arguments: Option, + connector_id: Option, + connector_name: Option, + connector_description: Option, + tool_title: Option, + tool_description: Option, + annotations: Option, + }, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize)] +pub(crate) struct GuardianMcpAnnotations { + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) destructive_hint: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) open_world_hint: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub(crate) read_only_hint: Option, +} + /// Transcript entry retained for guardian review after filtering. #[derive(Debug, PartialEq, Eq)] struct GuardianTranscriptEntry { @@ -164,14 +222,11 @@ impl GuardianTranscriptEntryKind { async fn run_guardian_review( session: Arc, turn: Arc, - request: GuardianReviewRequest, + request: GuardianApprovalRequest, retry_reason: Option, ) -> ReviewDecision { session - .notify_background_event( - turn.as_ref(), - "Guardian assessing approval request...".to_string(), - ) + .notify_background_event(turn.as_ref(), "Reviewing approval request...".to_string()) .await; let prompt_items = build_guardian_prompt_items(session.as_ref(), retry_reason, request).await; @@ -199,14 +254,15 @@ async fn run_guardian_review( Some(Err(err)) => GuardianAssessment { risk_level: GuardianRiskLevel::High, risk_score: 100, - rationale: format!("Guardian review failed: {err}"), + rationale: format!("Automatic approval review failed: {err}"), evidence: vec![], }, None => GuardianAssessment { risk_level: GuardianRiskLevel::High, risk_score: 100, - rationale: "Guardian review timed out while evaluating the requested approval." - .to_string(), + rationale: + "Automatic approval review timed out while evaluating the requested approval." + .to_string(), evidence: vec![], }, }; @@ -216,8 +272,7 @@ async fn run_guardian_review( // Emit a concise warning so the parent turn has an auditable summary of the // guardian decision without needing the full subagent transcript. let warning = format!( - "Guardian {verdict} approval request ({}/100, {}): {}", - assessment.risk_score, + "Automatic approval review {verdict} (risk: {}): {}", assessment.risk_level.as_str(), assessment.rationale ); @@ -239,7 +294,7 @@ async fn run_guardian_review( pub(crate) async fn review_approval_request( session: &Arc, turn: &Arc, - request: GuardianReviewRequest, + request: GuardianApprovalRequest, retry_reason: Option, ) -> ReviewDecision { run_guardian_review(Arc::clone(session), Arc::clone(turn), request, retry_reason).await @@ -256,11 +311,11 @@ pub(crate) async fn review_approval_request( async fn build_guardian_prompt_items( session: &Session, retry_reason: Option, - request: GuardianReviewRequest, + request: GuardianApprovalRequest, ) -> Vec { let history = session.clone_history().await; let transcript_entries = collect_guardian_transcript_entries(history.raw_items()); - let planned_action_json = format_guardian_action_pretty(&request.action); + let planned_action_json = format_guardian_action_pretty(&request); let (transcript_entries, omission_note) = render_guardian_transcript_entries(transcript_entries.as_slice()); @@ -400,6 +455,13 @@ fn render_guardian_transcript_entries( fn collect_guardian_transcript_entries(items: &[ResponseItem]) -> Vec { let mut entries = Vec::new(); let mut tool_names_by_call_id = HashMap::new(); + let non_empty_entry = |kind, text: String| { + (!text.trim().is_empty()).then_some(GuardianTranscriptEntry { kind, text }) + }; + let content_entry = + |kind, content| content_items_to_text(content).and_then(|text| non_empty_entry(kind, text)); + let serialized_entry = + |kind, serialized: Option| serialized.and_then(|text| non_empty_entry(kind, text)); for item in items { let entry = match item { @@ -407,25 +469,16 @@ fn collect_guardian_transcript_entries(items: &[ResponseItem]) -> Vec { - content_items_to_text(content).map(|text| GuardianTranscriptEntry { - kind: GuardianTranscriptEntryKind::Assistant, - text, - }) + content_entry(GuardianTranscriptEntryKind::Assistant, content) } - ResponseItem::LocalShellCall { action, .. } => serde_json::to_string(action) - .ok() - .filter(|text| !text.trim().is_empty()) - .map(|text| GuardianTranscriptEntry { - kind: GuardianTranscriptEntryKind::Tool("tool shell call".to_string()), - text, - }), + ResponseItem::LocalShellCall { action, .. } => serialized_entry( + GuardianTranscriptEntryKind::Tool("tool shell call".to_string()), + serde_json::to_string(action).ok(), + ), ResponseItem::FunctionCall { call_id, name, @@ -450,28 +503,26 @@ fn collect_guardian_transcript_entries(items: &[ResponseItem]) -> Vec action - .as_ref() - .and_then(|action| serde_json::to_string(action).ok()) - .filter(|text| !text.trim().is_empty()) - .map(|text| GuardianTranscriptEntry { - kind: GuardianTranscriptEntryKind::Tool("tool web_search call".to_string()), - text, - }), + ResponseItem::WebSearchCall { action, .. } => action.as_ref().and_then(|action| { + serialized_entry( + GuardianTranscriptEntryKind::Tool("tool web_search call".to_string()), + serde_json::to_string(action).ok(), + ) + }), ResponseItem::FunctionCallOutput { call_id, output } - | ResponseItem::CustomToolCallOutput { call_id, output } => output - .body - .to_text() - .filter(|text| !text.trim().is_empty()) - .map(|text| GuardianTranscriptEntry { - kind: GuardianTranscriptEntryKind::Tool( - tool_names_by_call_id.get(call_id).map_or_else( - || "tool result".to_string(), - |name| format!("tool {name} result"), + | ResponseItem::CustomToolCallOutput { call_id, output } => { + output.body.to_text().and_then(|text| { + non_empty_entry( + GuardianTranscriptEntryKind::Tool( + tool_names_by_call_id.get(call_id).map_or_else( + || "tool result".to_string(), + |name| format!("tool {name} result"), + ), ), - ), - text, - }), + text, + ) + }) + } _ => None, }; @@ -506,6 +557,13 @@ async fn run_guardian_subagent( .models_manager .list_models(crate::models_manager::manager::RefreshStrategy::Offline) .await; + let preferred_reasoning_effort = |supports_low: bool, fallback| { + if supports_low { + Some(codex_protocol::openai_models::ReasoningEffort::Low) + } else { + fallback + } + }; // Prefer `GUARDIAN_PREFERRED_MODEL` when the active provider exposes it, // but fall back to the parent turn's active model so guardian does not // become a blanket deny on providers or test environments that do not @@ -514,28 +572,23 @@ async fn run_guardian_subagent( .iter() .find(|preset| preset.model == GUARDIAN_PREFERRED_MODEL); let (guardian_model, guardian_reasoning_effort) = if let Some(preset) = preferred_model { - let reasoning_effort = if preset - .supported_reasoning_efforts - .iter() - .any(|effort| effort.effort == codex_protocol::openai_models::ReasoningEffort::Low) - { - Some(codex_protocol::openai_models::ReasoningEffort::Low) - } else { - Some(preset.default_reasoning_effort) - }; + let reasoning_effort = preferred_reasoning_effort( + preset + .supported_reasoning_efforts + .iter() + .any(|effort| effort.effort == codex_protocol::openai_models::ReasoningEffort::Low), + Some(preset.default_reasoning_effort), + ); (GUARDIAN_PREFERRED_MODEL.to_string(), reasoning_effort) } else { - let reasoning_effort = if turn - .model_info - .supported_reasoning_levels - .iter() - .any(|preset| preset.effort == codex_protocol::openai_models::ReasoningEffort::Low) - { - Some(codex_protocol::openai_models::ReasoningEffort::Low) - } else { + let reasoning_effort = preferred_reasoning_effort( + turn.model_info + .supported_reasoning_levels + .iter() + .any(|preset| preset.effort == codex_protocol::openai_models::ReasoningEffort::Low), turn.reasoning_effort - .or(turn.model_info.default_reasoning_level) - }; + .or(turn.model_info.default_reasoning_level), + ); (turn.model_info.slug.clone(), reasoning_effort) }; let guardian_config = build_guardian_subagent_config( @@ -678,9 +731,149 @@ fn truncate_guardian_action_value(value: Value) -> Value { } } -fn format_guardian_action_pretty(action: &Value) -> String { - serde_json::to_string_pretty(&truncate_guardian_action_value(action.clone())) - .unwrap_or_else(|_| "null".to_string()) +fn format_guardian_action_pretty(action: &GuardianApprovalRequest) -> String { + let mut value = match action { + GuardianApprovalRequest::Shell { + command, + cwd, + sandbox_permissions, + additional_permissions, + justification, + } => { + let mut action = serde_json::json!({ + "tool": "shell", + "command": command, + "cwd": cwd, + "sandbox_permissions": sandbox_permissions, + "additional_permissions": additional_permissions, + "justification": justification, + }); + if let Some(action) = action.as_object_mut() { + if additional_permissions.is_none() { + action.remove("additional_permissions"); + } + if justification.is_none() { + action.remove("justification"); + } + } + action + } + GuardianApprovalRequest::ExecCommand { + command, + cwd, + sandbox_permissions, + additional_permissions, + justification, + tty, + } => { + let mut action = serde_json::json!({ + "tool": "exec_command", + "command": command, + "cwd": cwd, + "sandbox_permissions": sandbox_permissions, + "additional_permissions": additional_permissions, + "justification": justification, + "tty": tty, + }); + if let Some(action) = action.as_object_mut() { + if additional_permissions.is_none() { + action.remove("additional_permissions"); + } + if justification.is_none() { + action.remove("justification"); + } + } + action + } + #[cfg(unix)] + GuardianApprovalRequest::Execve { + tool_name, + program, + argv, + cwd, + additional_permissions, + } => { + let mut action = serde_json::json!({ + "tool": tool_name, + "program": program, + "argv": argv, + "cwd": cwd, + "additional_permissions": additional_permissions, + }); + if let Some(action) = action.as_object_mut() + && additional_permissions.is_none() + { + action.remove("additional_permissions"); + } + action + } + GuardianApprovalRequest::ApplyPatch { + cwd, + files, + change_count, + patch, + } => serde_json::json!({ + "tool": "apply_patch", + "cwd": cwd, + "files": files, + "change_count": change_count, + "patch": patch, + }), + GuardianApprovalRequest::NetworkAccess { + target, + host, + protocol, + port, + } => serde_json::json!({ + "tool": "network_access", + "target": target, + "host": host, + "protocol": protocol, + "port": port, + }), + GuardianApprovalRequest::McpToolCall { + server, + tool_name, + arguments, + connector_id, + connector_name, + connector_description, + tool_title, + tool_description, + annotations, + } => { + let mut action = serde_json::json!({ + "tool": "mcp_tool_call", + "server": server, + "tool_name": tool_name, + "arguments": arguments, + "connector_id": connector_id, + "connector_name": connector_name, + "connector_description": connector_description, + "tool_title": tool_title, + "tool_description": tool_description, + "annotations": annotations, + }); + if let Some(action) = action.as_object_mut() { + for key in [ + ("arguments", arguments.is_none()), + ("connector_id", connector_id.is_none()), + ("connector_name", connector_name.is_none()), + ("connector_description", connector_description.is_none()), + ("tool_title", tool_title.is_none()), + ("tool_description", tool_description.is_none()), + ("annotations", annotations.is_none()), + ] { + if key.1 { + action.remove(key.0); + } + } + } + action + } + }; + value = truncate_guardian_action_value(value); + serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string()) } fn guardian_truncate_text(content: &str, token_cap: usize) -> String { diff --git a/codex-rs/core/src/guardian_tests.rs b/codex-rs/core/src/guardian_tests.rs index e6deaf256..dd342845f 100644 --- a/codex-rs/core/src/guardian_tests.rs +++ b/codex-rs/core/src/guardian_tests.rs @@ -6,6 +6,7 @@ use crate::config_loader::FeatureRequirementsToml; use crate::config_loader::NetworkConstraints; use crate::config_loader::RequirementSource; use crate::config_loader::Sourced; +use crate::test_support; use codex_network_proxy::NetworkProxyConfig; use codex_protocol::models::ContentItem; use core_test_support::context_snapshot; @@ -22,6 +23,8 @@ use insta::assert_snapshot; use pretty_assertions::assert_eq; use std::collections::BTreeMap; use std::path::PathBuf; +use std::sync::Arc; +use tokio_util::sync::CancellationToken; #[test] fn build_guardian_transcript_keeps_original_numbering() { @@ -154,21 +157,18 @@ fn guardian_truncate_text_keeps_prefix_suffix_and_xml_marker() { #[test] fn format_guardian_action_pretty_truncates_large_string_fields() { - let action = serde_json::json!({ - "tool": "apply_patch", - "cwd": PathBuf::from("/tmp"), - "files": Vec::::new(), - "change_count": 1usize, - "patch": "line\n".repeat(10_000), - }); + let patch = "line\n".repeat(10_000); + let action = GuardianApprovalRequest::ApplyPatch { + cwd: PathBuf::from("/tmp"), + files: Vec::new(), + change_count: 1usize, + patch: patch.clone(), + }; let rendered = format_guardian_action_pretty(&action); - let original_patch = action["patch"] - .as_str() - .expect("test patch should serialize as a string"); assert!(rendered.contains("\"tool\": \"apply_patch\"")); - assert!(rendered.len() < original_patch.len()); + assert!(rendered.len() < patch.len()); } #[test] @@ -253,7 +253,7 @@ async fn guardian_review_request_layout_matches_model_visible_request_snapshot() let mut config = (*turn.config).clone(); config.model_provider.base_url = Some(format!("{}/v1", server.uri())); let config = Arc::new(config); - let models_manager = Arc::new(crate::test_support::models_manager_with_provider( + let models_manager = Arc::new(test_support::models_manager_with_provider( config.codex_home.clone(), Arc::clone(&session.services.auth_manager), config.model_provider.clone(), @@ -307,19 +307,19 @@ async fn guardian_review_request_layout_matches_model_visible_request_snapshot() let prompt = build_guardian_prompt_items( session.as_ref(), Some("Sandbox denied outbound git push to github.com.".to_string()), - GuardianReviewRequest { - action: serde_json::json!({ - "tool": "shell", - "command": [ - "git", - "push", - "origin", - "guardian-approval-mvp" - ], - "cwd": "/repo/codex-rs/core", - "sandbox_permissions": crate::sandboxing::SandboxPermissions::UseDefault, - "justification": "Need to push the reviewed docs fix to the repo remote.", - }), + GuardianApprovalRequest::Shell { + command: vec![ + "git".to_string(), + "push".to_string(), + "origin".to_string(), + "guardian-approval-mvp".to_string(), + ], + cwd: PathBuf::from("/repo/codex-rs/core"), + sandbox_permissions: crate::sandboxing::SandboxPermissions::UseDefault, + additional_permissions: None, + justification: Some( + "Need to push the reviewed docs fix to the repo remote.".to_string(), + ), }, ) .await; diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index be0ca35af..29d1de5b6 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -18,7 +18,8 @@ use crate::config::edit::ConfigEditsBuilder; use crate::config::types::AppToolApproval; use crate::connectors; use crate::features::Feature; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; +use crate::guardian::GuardianMcpAnnotations; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::mcp::CODEX_APPS_MCP_SERVER_NAME; @@ -574,88 +575,23 @@ fn persistent_mcp_tool_approval_key( fn build_guardian_mcp_tool_review_request( invocation: &McpInvocation, metadata: Option<&McpToolApprovalMetadata>, -) -> GuardianReviewRequest { - let mut action = serde_json::Map::from_iter([ - ( - "tool".to_string(), - serde_json::Value::String("mcp_tool_call".to_string()), - ), - ( - "server".to_string(), - serde_json::Value::String(invocation.server.clone()), - ), - ( - "tool_name".to_string(), - serde_json::Value::String(invocation.tool.clone()), - ), - ]); - - if let Some(arguments) = invocation.arguments.clone() { - action.insert("arguments".to_string(), arguments); - } - - if let Some(metadata) = metadata { - if let Some(connector_id) = metadata.connector_id.as_ref() { - action.insert( - "connector_id".to_string(), - serde_json::Value::String(connector_id.clone()), - ); - } - if let Some(connector_name) = metadata.connector_name.as_ref() { - action.insert( - "connector_name".to_string(), - serde_json::Value::String(connector_name.clone()), - ); - } - if let Some(connector_description) = metadata.connector_description.as_ref() { - action.insert( - "connector_description".to_string(), - serde_json::Value::String(connector_description.clone()), - ); - } - if let Some(tool_title) = metadata.tool_title.as_ref() { - action.insert( - "tool_title".to_string(), - serde_json::Value::String(tool_title.clone()), - ); - } - if let Some(tool_description) = metadata.tool_description.as_ref() { - action.insert( - "tool_description".to_string(), - serde_json::Value::String(tool_description.clone()), - ); - } - if let Some(annotations) = metadata.annotations.as_ref() { - let mut annotation_map = serde_json::Map::new(); - if let Some(destructive_hint) = annotations.destructive_hint { - annotation_map.insert( - "destructive_hint".to_string(), - serde_json::Value::Bool(destructive_hint), - ); - } - if let Some(open_world_hint) = annotations.open_world_hint { - annotation_map.insert( - "open_world_hint".to_string(), - serde_json::Value::Bool(open_world_hint), - ); - } - if let Some(read_only_hint) = annotations.read_only_hint { - annotation_map.insert( - "read_only_hint".to_string(), - serde_json::Value::Bool(read_only_hint), - ); - } - if !annotation_map.is_empty() { - action.insert( - "annotations".to_string(), - serde_json::Value::Object(annotation_map), - ); - } - } - } - - GuardianReviewRequest { - action: serde_json::Value::Object(action), +) -> GuardianApprovalRequest { + GuardianApprovalRequest::McpToolCall { + server: invocation.server.clone(), + tool_name: invocation.tool.clone(), + arguments: invocation.arguments.clone(), + connector_id: metadata.and_then(|metadata| metadata.connector_id.clone()), + connector_name: metadata.and_then(|metadata| metadata.connector_name.clone()), + connector_description: metadata.and_then(|metadata| metadata.connector_description.clone()), + tool_title: metadata.and_then(|metadata| metadata.tool_title.clone()), + tool_description: metadata.and_then(|metadata| metadata.tool_description.clone()), + annotations: metadata + .and_then(|metadata| metadata.annotations.as_ref()) + .map(|annotations| GuardianMcpAnnotations { + destructive_hint: annotations.destructive_hint, + open_world_hint: annotations.open_world_hint, + read_only_hint: annotations.read_only_hint, + }), } } @@ -1599,20 +1535,18 @@ mod tests { assert_eq!( request, - GuardianReviewRequest { - action: serde_json::json!({ - "tool": "mcp_tool_call", - "server": CODEX_APPS_MCP_SERVER_NAME, - "tool_name": "browser_navigate", - "arguments": { - "url": "https://example.com", - }, - "connector_id": "playwright", - "connector_name": "Playwright", - "connector_description": "Browser automation", - "tool_title": "Navigate", - "tool_description": "Open a page", - }), + GuardianApprovalRequest::McpToolCall { + server: CODEX_APPS_MCP_SERVER_NAME.to_string(), + tool_name: "browser_navigate".to_string(), + arguments: Some(serde_json::json!({ + "url": "https://example.com", + })), + connector_id: Some("playwright".to_string()), + connector_name: Some("Playwright".to_string()), + connector_description: Some("Browser automation".to_string()), + tool_title: Some("Navigate".to_string()), + tool_description: Some("Open a page".to_string()), + annotations: None, } ); } @@ -1637,16 +1571,19 @@ mod tests { assert_eq!( request, - GuardianReviewRequest { - action: serde_json::json!({ - "tool": "mcp_tool_call", - "server": "custom_server", - "tool_name": "dangerous_tool", - "annotations": { - "destructive_hint": true, - "open_world_hint": true, - "read_only_hint": false, - }, + GuardianApprovalRequest::McpToolCall { + server: "custom_server".to_string(), + tool_name: "dangerous_tool".to_string(), + arguments: None, + connector_id: None, + connector_name: None, + connector_description: None, + tool_title: None, + tool_description: None, + annotations: Some(GuardianMcpAnnotations { + destructive_hint: Some(true), + open_world_hint: Some(true), + read_only_hint: Some(false), }), } ); diff --git a/codex-rs/core/src/tools/network_approval.rs b/codex-rs/core/src/tools/network_approval.rs index cd223b169..78f1bb0f4 100644 --- a/codex-rs/core/src/tools/network_approval.rs +++ b/codex-rs/core/src/tools/network_approval.rs @@ -1,6 +1,6 @@ use crate::codex::Session; use crate::guardian::GUARDIAN_REJECTION_MESSAGE; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::network_policy_decision::denied_network_policy_message; @@ -21,7 +21,6 @@ use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::ReviewDecision; use codex_protocol::protocol::WarningEvent; use indexmap::IndexMap; -use serde_json::json; use std::collections::HashMap; use std::collections::HashSet; use std::sync::Arc; @@ -344,14 +343,11 @@ impl NetworkApprovalService { review_approval_request( &session, &turn_context, - GuardianReviewRequest { - action: json!({ - "tool": "network_access", - "target": target, - "host": request.host, - "protocol": key.protocol, - "port": key.port, - }), + GuardianApprovalRequest::NetworkAccess { + target, + host: request.host, + protocol, + port: key.port, }, Some(policy_denial_message.clone()), ) diff --git a/codex-rs/core/src/tools/runtimes/apply_patch.rs b/codex-rs/core/src/tools/runtimes/apply_patch.rs index e745dd370..516374179 100644 --- a/codex-rs/core/src/tools/runtimes/apply_patch.rs +++ b/codex-rs/core/src/tools/runtimes/apply_patch.rs @@ -5,7 +5,7 @@ //! `codex --codex-run-as-apply-patch`, and runs under the current //! `SandboxAttempt` with a minimal environment. use crate::exec::ExecToolCallOutput; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::sandboxing::CommandSpec; @@ -28,7 +28,6 @@ use codex_protocol::protocol::FileChange; use codex_protocol::protocol::ReviewDecision; use codex_utils_absolute_path::AbsolutePathBuf; use futures::future::BoxFuture; -use serde_json::json; use std::collections::HashMap; use std::path::PathBuf; @@ -50,15 +49,12 @@ impl ApplyPatchRuntime { Self } - fn build_guardian_review_request(req: &ApplyPatchRequest) -> GuardianReviewRequest { - GuardianReviewRequest { - action: json!({ - "tool": "apply_patch", - "cwd": req.action.cwd, - "files": req.file_paths, - "change_count": req.changes.len(), - "patch": req.action.patch, - }), + fn build_guardian_review_request(req: &ApplyPatchRequest) -> GuardianApprovalRequest { + GuardianApprovalRequest::ApplyPatch { + cwd: req.action.cwd.clone(), + files: req.file_paths.clone(), + change_count: req.changes.len(), + patch: req.action.patch.clone(), } } @@ -135,8 +131,8 @@ impl Approvable for ApplyPatchRuntime { let changes = req.changes.clone(); Box::pin(async move { if routes_approval_to_guardian(turn) { - let request = ApplyPatchRuntime::build_guardian_review_request(req); - return review_approval_request(session, turn, request, retry_reason).await; + let action = ApplyPatchRuntime::build_guardian_review_request(req); + return review_approval_request(session, turn, action, retry_reason).await; } if let Some(reason) = retry_reason { let rx_approve = session @@ -256,14 +252,11 @@ mod tests { assert_eq!( guardian_request, - GuardianReviewRequest { - action: json!({ - "tool": "apply_patch", - "cwd": expected_cwd, - "files": request.file_paths, - "change_count": 1usize, - "patch": expected_patch, - }), + GuardianApprovalRequest::ApplyPatch { + cwd: expected_cwd, + files: request.file_paths, + change_count: 1usize, + patch: expected_patch, } ); } diff --git a/codex-rs/core/src/tools/runtimes/shell.rs b/codex-rs/core/src/tools/runtimes/shell.rs index 388573eb4..dea0aa202 100644 --- a/codex-rs/core/src/tools/runtimes/shell.rs +++ b/codex-rs/core/src/tools/runtimes/shell.rs @@ -11,7 +11,7 @@ pub(crate) mod zsh_fork_backend; use crate::command_canonicalization::canonicalize_command_for_approval; use crate::exec::ExecToolCallOutput; use crate::features::Feature; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::powershell::prefix_powershell_script_with_utf8; @@ -38,7 +38,6 @@ use codex_network_proxy::NetworkProxy; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::ReviewDecision; use futures::future::BoxFuture; -use serde_json::json; use std::collections::HashMap; use std::path::PathBuf; @@ -145,33 +144,26 @@ impl Approvable for ShellRuntime { let keys = self.approval_keys(req); let command = req.command.clone(); let cwd = req.cwd.clone(); - let reason = ctx - .retry_reason - .clone() - .or_else(|| req.justification.clone()); + let retry_reason = ctx.retry_reason.clone(); + let reason = retry_reason.clone().or_else(|| req.justification.clone()); let session = ctx.session; let turn = ctx.turn; let call_id = ctx.call_id.to_string(); Box::pin(async move { if routes_approval_to_guardian(turn) { - let mut action = json!({ - "tool": "shell", - "command": command, - "cwd": cwd, - "sandbox_permissions": req.sandbox_permissions, - "additional_permissions": req.additional_permissions, - "justification": reason, - }); - if let Some(action) = action.as_object_mut() { - if req.additional_permissions.is_none() { - action.remove("additional_permissions"); - } - if reason.is_none() { - action.remove("justification"); - } - } - let request = GuardianReviewRequest { action }; - return review_approval_request(session, turn, request, None).await; + return review_approval_request( + session, + turn, + GuardianApprovalRequest::Shell { + command, + cwd, + sandbox_permissions: req.sandbox_permissions, + additional_permissions: req.additional_permissions.clone(), + justification: req.justification.clone(), + }, + retry_reason, + ) + .await; } with_cached_approval(&session.services, "shell", keys, move || async move { let available_decisions = None; diff --git a/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs b/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs index 35db88d96..04732b00c 100644 --- a/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs +++ b/codex-rs/core/src/tools/runtimes/shell/unix_escalation.rs @@ -7,7 +7,7 @@ use crate::exec::SandboxType; use crate::exec::is_likely_sandbox_denied; use crate::exec_policy::prompt_is_rejected_by_policy; use crate::features::Feature; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::sandboxing::ExecRequest; @@ -50,7 +50,6 @@ use codex_shell_escalation::PreparedExec; use codex_shell_escalation::ShellCommandExecutor; use codex_shell_escalation::Stopwatch; use codex_utils_absolute_path::AbsolutePathBuf; -use serde_json::json; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -386,16 +385,19 @@ impl CoreShellActionProvider { Ok(stopwatch .pause_for(async move { if routes_approval_to_guardian(&turn) { - let request = GuardianReviewRequest { - action: json!({ - "tool": tool_name, - "program": program, - "argv": argv, - "cwd": workdir, - "additional_permissions": additional_permissions, - }), - }; - return review_approval_request(&session, &turn, request, None).await; + return review_approval_request( + &session, + &turn, + GuardianApprovalRequest::Execve { + tool_name: tool_name.to_string(), + program: program.to_string_lossy().into_owned(), + argv: argv.to_vec(), + cwd: workdir, + additional_permissions, + }, + None, + ) + .await; } let available_decisions = vec![ Some(ReviewDecision::Approved), diff --git a/codex-rs/core/src/tools/runtimes/unified_exec.rs b/codex-rs/core/src/tools/runtimes/unified_exec.rs index 3afbd9ce9..48052a537 100644 --- a/codex-rs/core/src/tools/runtimes/unified_exec.rs +++ b/codex-rs/core/src/tools/runtimes/unified_exec.rs @@ -9,7 +9,7 @@ use crate::error::CodexErr; use crate::error::SandboxErr; use crate::exec::ExecExpiration; use crate::features::Feature; -use crate::guardian::GuardianReviewRequest; +use crate::guardian::GuardianApprovalRequest; use crate::guardian::review_approval_request; use crate::guardian::routes_approval_to_guardian; use crate::powershell::prefix_powershell_script_with_utf8; @@ -41,7 +41,6 @@ use codex_network_proxy::NetworkProxy; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::ReviewDecision; use futures::future::BoxFuture; -use serde_json::json; use std::collections::HashMap; use std::path::PathBuf; @@ -113,31 +112,24 @@ impl Approvable for UnifiedExecRuntime<'_> { let call_id = ctx.call_id.to_string(); let command = req.command.clone(); let cwd = req.cwd.clone(); - let reason = ctx - .retry_reason - .clone() - .or_else(|| req.justification.clone()); + let retry_reason = ctx.retry_reason.clone(); + let reason = retry_reason.clone().or_else(|| req.justification.clone()); Box::pin(async move { if routes_approval_to_guardian(turn) { - let mut action = json!({ - "tool": "exec_command", - "command": command, - "cwd": cwd, - "sandbox_permissions": req.sandbox_permissions, - "additional_permissions": req.additional_permissions, - "justification": reason, - "tty": req.tty, - }); - if let Some(action) = action.as_object_mut() { - if req.additional_permissions.is_none() { - action.remove("additional_permissions"); - } - if reason.is_none() { - action.remove("justification"); - } - } - let request = GuardianReviewRequest { action }; - return review_approval_request(session, turn, request, None).await; + return review_approval_request( + session, + turn, + GuardianApprovalRequest::ExecCommand { + command, + cwd, + sandbox_permissions: req.sandbox_permissions, + additional_permissions: req.additional_permissions.clone(), + justification: req.justification.clone(), + tty: req.tty, + }, + retry_reason, + ) + .await; } with_cached_approval(&session.services, "unified_exec", keys, || async move { let available_decisions = None; diff --git a/codex-rs/core/tests/suite/permissions_messages.rs b/codex-rs/core/tests/suite/permissions_messages.rs index f8df52a3a..8dfe2b5e6 100644 --- a/codex-rs/core/tests/suite/permissions_messages.rs +++ b/codex-rs/core/tests/suite/permissions_messages.rs @@ -490,7 +490,6 @@ async fn permissions_message_includes_writable_roots() -> Result<()> { let expected = DeveloperInstructions::from_policy( &sandbox_policy, AskForApproval::OnRequest, - false, &Policy::empty(), test.config.cwd.as_path(), false, diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 88a47f206..a34eeeb8e 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -408,8 +408,6 @@ const APPROVAL_POLICY_ON_REQUEST_RULE: &str = include_str!("prompts/permissions/approval_policy/on_request_rule.md"); const APPROVAL_POLICY_ON_REQUEST_RULE_REQUEST_PERMISSION: &str = include_str!("prompts/permissions/approval_policy/on_request_rule_request_permission.md"); -const GUARDIAN_APPROVAL_FEATURE: &str = - include_str!("prompts/permissions/approval_policy/guardian.md"); const SANDBOX_MODE_DANGER_FULL_ACCESS: &str = include_str!("prompts/permissions/sandbox_mode/danger_full_access.md"); @@ -427,7 +425,6 @@ impl DeveloperInstructions { pub fn from( approval_policy: AskForApproval, - guardian_approval_enabled: bool, exec_policy: &Policy, request_permission_enabled: bool, ) -> DeveloperInstructions { @@ -451,14 +448,7 @@ impl DeveloperInstructions { AskForApproval::Never => APPROVAL_POLICY_NEVER.to_string(), AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.to_string(), AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.to_string(), - AskForApproval::OnRequest => { - let mut instructions = on_request_instructions(); - if guardian_approval_enabled { - instructions.push_str("\n\n"); - instructions.push_str(GUARDIAN_APPROVAL_FEATURE); - } - instructions - } + AskForApproval::OnRequest => on_request_instructions(), AskForApproval::Reject(reject_config) => { let on_request_instructions = on_request_instructions(); let sandbox_approval = reject_config.sandbox_approval; @@ -521,7 +511,6 @@ impl DeveloperInstructions { pub fn from_policy( sandbox_policy: &SandboxPolicy, approval_policy: AskForApproval, - guardian_approval_enabled: bool, exec_policy: &Policy, cwd: &Path, request_permission_enabled: bool, @@ -546,7 +535,6 @@ impl DeveloperInstructions { sandbox_mode, network_access, approval_policy, - guardian_approval_enabled, exec_policy, writable_roots, request_permission_enabled, @@ -571,7 +559,6 @@ impl DeveloperInstructions { sandbox_mode: SandboxMode, network_access: NetworkAccess, approval_policy: AskForApproval, - guardian_approval_enabled: bool, exec_policy: &Policy, writable_roots: Option>, request_permission_enabled: bool, @@ -585,7 +572,6 @@ impl DeveloperInstructions { )) .concat(DeveloperInstructions::from( approval_policy, - guardian_approval_enabled, exec_policy, request_permission_enabled, )) @@ -1667,7 +1653,6 @@ mod tests { SandboxMode::WorkspaceWrite, NetworkAccess::Enabled, AskForApproval::OnRequest, - false, &Policy::empty(), None, false, @@ -1697,7 +1682,6 @@ mod tests { let instructions = DeveloperInstructions::from_policy( &policy, AskForApproval::UnlessTrusted, - false, &Policy::empty(), &PathBuf::from("/tmp"), false, @@ -1720,7 +1704,6 @@ mod tests { SandboxMode::WorkspaceWrite, NetworkAccess::Enabled, AskForApproval::OnRequest, - false, &exec_policy, None, false, @@ -1738,7 +1721,6 @@ mod tests { SandboxMode::WorkspaceWrite, NetworkAccess::Enabled, AskForApproval::OnRequest, - false, &Policy::empty(), None, true, @@ -1749,23 +1731,6 @@ mod tests { assert!(text.contains("additional_permissions")); } - #[test] - fn includes_guardian_feature_guidance_for_on_request_when_enabled() { - let instructions = DeveloperInstructions::from_permissions_with_network( - SandboxMode::WorkspaceWrite, - NetworkAccess::Enabled, - AskForApproval::OnRequest, - true, - &Policy::empty(), - None, - false, - ); - - let text = instructions.into_text(); - assert!(text.contains("guardian subagent")); - assert!(text.contains("approval prompts")); - } - #[test] fn render_command_prefix_list_sorts_by_len_then_total_len_then_alphabetical() { let prefixes = vec![ diff --git a/codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md b/codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md deleted file mode 100644 index 779dfdcf8..000000000 --- a/codex-rs/protocol/src/prompts/permissions/approval_policy/guardian.md +++ /dev/null @@ -1,3 +0,0 @@ -Guardian approvals are enabled. While `approval_policy` is still `on-request`, approval prompts are routed to a guardian subagent instead of the user. Use `sandbox_permissions: "require_escalated"` with a concise `justification` when you need unsandboxed execution, and use `sandbox_permissions: "with_additional_permissions"` plus `additional_permissions` when you need broader sandboxed access. Codex will ask the guardian subagent to assess the risk automatically. - -Do not message the user before requesting escalation. If the guardian rejects an action, do not attempt the same outcome via workaround, indirect execution, or policy circumvention. Either choose a materially safer alternative or stop and ask the user for guidance. diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval.snap index 55093f626..eec745ad5 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval.snap @@ -11,8 +11,9 @@ expression: popup [ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency. [ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart Codex after enabling. - [ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing - them to you, including sandbox escapes and blocked network access. + [ ] Automatic approval review Dispatch `on-request` approval prompts (for e.g. sandbox escapes or blocked network + access) to a carefully-prompted security reviewer subagent rather than blocking the + agent on your input. [ ] Prevent sleep while running Keep your computer awake while Codex is running a thread. Press space to select or enter to save for next conversation diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval_linux.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval_linux.snap index 6f51ad7a9..cddafbe30 100644 --- a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval_linux.snap +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__experimental_popup_includes_guardian_approval_linux.snap @@ -12,8 +12,9 @@ expression: popup [ ] Multi-agents Ask Codex to spawn multiple agents to parallelize the work and win in efficiency. [ ] Apps Use a connected ChatGPT App using "$". Install Apps via /apps command. Restart Codex after enabling. - [ ] Guardian approvals Let a guardian subagent review `on-request` approval prompts instead of showing - them to you, including sandbox escapes and blocked network access. + [ ] Automatic approval review Dispatch `on-request` approval prompts (for e.g. sandbox escapes or blocked network + access) to a carefully-prompted security reviewer subagent rather than blocking the + agent on your input. [ ] Prevent sleep while running Keep your computer awake while Codex is running a thread. Press space to select or enter to save for next conversation