add(core): arc_monitor (#13936)

## Summary
- add ARC monitor support for MCP tool calls by serializing MCP approval
requests into the ARC action shape and sending the relevant
conversation/policy context to the `/api/codex/safety/arc` endpoint
- route ARC outcomes back into MCP approval flow so `ask-user` falls
back to a user prompt and `steer-model` blocks the tool call, with
guardian/ARC tests covering the new request shape
- update the TUI approval copy from “Approve Once” to “Allow” / “Allow
for this session” and refresh the related
  snapshots

---------

Co-authored-by: Fouad Matin <fouad@openai.com>
Co-authored-by: Fouad Matin <169186268+fouad-openai@users.noreply.github.com>
This commit is contained in:
Matthew Zeng 2026-03-10 13:16:47 -07:00 committed by Michael Bolin
parent b7f8e9195a
commit f2d66fadd8
9 changed files with 1157 additions and 30 deletions

View file

@ -0,0 +1,862 @@
use std::env;
use std::time::Duration;
use serde::Deserialize;
use serde::Serialize;
use tracing::warn;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::compact::content_items_to_text;
use crate::default_client::build_reqwest_client;
use crate::event_mapping::is_contextual_user_message_content;
use codex_protocol::models::MessagePhase;
use codex_protocol::models::ResponseItem;
const ARC_MONITOR_TIMEOUT: Duration = Duration::from_secs(30);
const CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE: &str = "CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE";
const CODEX_ARC_MONITOR_TOKEN: &str = "CODEX_ARC_MONITOR_TOKEN";
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) enum ArcMonitorOutcome {
Ok,
SteerModel(String),
AskUser(String),
}
#[derive(Debug, Serialize, PartialEq)]
struct ArcMonitorRequest {
metadata: ArcMonitorMetadata,
#[serde(skip_serializing_if = "Option::is_none")]
messages: Option<Vec<ArcMonitorChatMessage>>,
#[serde(skip_serializing_if = "Option::is_none")]
input: Option<Vec<ResponseItem>>,
#[serde(skip_serializing_if = "Option::is_none")]
policies: Option<ArcMonitorPolicies>,
action: serde_json::Map<String, serde_json::Value>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
struct ArcMonitorResult {
outcome: ArcMonitorResultOutcome,
short_reason: String,
rationale: String,
risk_score: u8,
risk_level: ArcMonitorRiskLevel,
evidence: Vec<ArcMonitorEvidence>,
}
#[derive(Debug, Serialize, PartialEq)]
struct ArcMonitorChatMessage {
role: String,
content: serde_json::Value,
}
#[derive(Debug, Serialize, PartialEq)]
struct ArcMonitorPolicies {
user: Option<String>,
developer: Option<String>,
}
#[derive(Debug, Serialize, PartialEq)]
#[serde(deny_unknown_fields)]
struct ArcMonitorMetadata {
codex_thread_id: String,
codex_turn_id: String,
#[serde(skip_serializing_if = "Option::is_none")]
conversation_id: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
protection_client_callsite: Option<String>,
}
#[derive(Debug, Deserialize)]
#[serde(deny_unknown_fields)]
#[allow(dead_code)]
struct ArcMonitorEvidence {
message: String,
why: String,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "kebab-case")]
enum ArcMonitorResultOutcome {
Ok,
SteerModel,
AskUser,
}
#[derive(Debug, Deserialize)]
#[serde(rename_all = "lowercase")]
enum ArcMonitorRiskLevel {
Low,
Medium,
High,
Critical,
}
pub(crate) async fn monitor_action(
sess: &Session,
turn_context: &TurnContext,
action: serde_json::Value,
) -> ArcMonitorOutcome {
let auth = match turn_context.auth_manager.as_ref() {
Some(auth_manager) => match auth_manager.auth().await {
Some(auth) if auth.is_chatgpt_auth() => Some(auth),
_ => None,
},
None => None,
};
let token = if let Some(token) = read_non_empty_env_var(CODEX_ARC_MONITOR_TOKEN) {
token
} else {
let Some(auth) = auth.as_ref() else {
return ArcMonitorOutcome::Ok;
};
match auth.get_token() {
Ok(token) => token,
Err(err) => {
warn!(
error = %err,
"skipping safety monitor because auth token is unavailable"
);
return ArcMonitorOutcome::Ok;
}
}
};
let url = read_non_empty_env_var(CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE).unwrap_or_else(|| {
format!(
"{}/api/codex/safety/arc",
turn_context.config.chatgpt_base_url.trim_end_matches('/')
)
});
let action = match action {
serde_json::Value::Object(action) => action,
_ => {
warn!("skipping safety monitor because action payload is not an object");
return ArcMonitorOutcome::Ok;
}
};
let body = build_arc_monitor_request(sess, turn_context, action).await;
let client = build_reqwest_client();
let mut request = client
.post(&url)
.timeout(ARC_MONITOR_TIMEOUT)
.json(&body)
.bearer_auth(token);
if let Some(account_id) = auth
.as_ref()
.and_then(crate::auth::CodexAuth::get_account_id)
{
request = request.header("chatgpt-account-id", account_id);
}
let response = match request.send().await {
Ok(response) => response,
Err(err) => {
warn!(error = %err, %url, "safety monitor request failed");
return ArcMonitorOutcome::Ok;
}
};
let status = response.status();
if !status.is_success() {
let response_text = response.text().await.unwrap_or_default();
warn!(
%status,
%url,
response_text,
"safety monitor returned non-success status"
);
return ArcMonitorOutcome::Ok;
}
let response = match response.json::<ArcMonitorResult>().await {
Ok(response) => response,
Err(err) => {
warn!(error = %err, %url, "failed to parse safety monitor response");
return ArcMonitorOutcome::Ok;
}
};
tracing::debug!(
risk_score = response.risk_score,
risk_level = ?response.risk_level,
evidence_count = response.evidence.len(),
"safety monitor completed"
);
let short_reason = response.short_reason.trim();
let rationale = response.rationale.trim();
match response.outcome {
ArcMonitorResultOutcome::Ok => ArcMonitorOutcome::Ok,
ArcMonitorResultOutcome::AskUser => {
if !short_reason.is_empty() {
ArcMonitorOutcome::AskUser(short_reason.to_string())
} else if !rationale.is_empty() {
ArcMonitorOutcome::AskUser(rationale.to_string())
} else {
ArcMonitorOutcome::AskUser(
"Additional confirmation is required before this tool call can continue."
.to_string(),
)
}
}
ArcMonitorResultOutcome::SteerModel => {
if !rationale.is_empty() {
ArcMonitorOutcome::SteerModel(rationale.to_string())
} else if !short_reason.is_empty() {
ArcMonitorOutcome::SteerModel(short_reason.to_string())
} else {
ArcMonitorOutcome::SteerModel(
"Tool call was cancelled because of safety risks.".to_string(),
)
}
}
}
}
fn read_non_empty_env_var(key: &str) -> Option<String> {
match env::var(key) {
Ok(value) => {
let value = value.trim();
(!value.is_empty()).then(|| value.to_string())
}
Err(env::VarError::NotPresent) => None,
Err(env::VarError::NotUnicode(_)) => {
warn!(
env_var = key,
"ignoring non-unicode safety monitor env override"
);
None
}
}
}
async fn build_arc_monitor_request(
sess: &Session,
turn_context: &TurnContext,
action: serde_json::Map<String, serde_json::Value>,
) -> ArcMonitorRequest {
let history = sess.clone_history().await;
let mut messages = build_arc_monitor_messages(history.raw_items());
if messages.is_empty() {
messages.push(build_arc_monitor_message(
"user",
serde_json::Value::String(
"No prior conversation history is available for this ARC evaluation.".to_string(),
),
));
}
let conversation_id = sess.conversation_id.to_string();
ArcMonitorRequest {
metadata: ArcMonitorMetadata {
codex_thread_id: conversation_id.clone(),
codex_turn_id: turn_context.sub_id.clone(),
conversation_id: Some(conversation_id),
protection_client_callsite: None,
},
messages: Some(messages),
input: None,
policies: Some(ArcMonitorPolicies {
user: None,
developer: None,
}),
action,
}
}
fn build_arc_monitor_messages(items: &[ResponseItem]) -> Vec<ArcMonitorChatMessage> {
let last_tool_call_index = items
.iter()
.enumerate()
.rev()
.find(|(_, item)| {
matches!(
item,
ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::WebSearchCall { .. }
)
})
.map(|(index, _)| index);
let last_encrypted_reasoning_index = items
.iter()
.enumerate()
.rev()
.find(|(_, item)| {
matches!(
item,
ResponseItem::Reasoning {
encrypted_content: Some(encrypted_content),
..
} if !encrypted_content.trim().is_empty()
)
})
.map(|(index, _)| index);
items
.iter()
.enumerate()
.filter_map(|(index, item)| {
build_arc_monitor_message_item(
item,
index,
last_tool_call_index,
last_encrypted_reasoning_index,
)
})
.collect()
}
fn build_arc_monitor_message_item(
item: &ResponseItem,
index: usize,
last_tool_call_index: Option<usize>,
last_encrypted_reasoning_index: Option<usize>,
) -> Option<ArcMonitorChatMessage> {
match item {
ResponseItem::Message { role, content, .. } if role == "user" => {
if is_contextual_user_message_content(content) {
None
} else {
content_items_to_text(content)
.map(|text| build_arc_monitor_text_message("user", "input_text", text))
}
}
ResponseItem::Message {
role,
content,
phase: Some(MessagePhase::FinalAnswer),
..
} if role == "assistant" => content_items_to_text(content)
.map(|text| build_arc_monitor_text_message("assistant", "output_text", text)),
ResponseItem::Message { .. } => None,
ResponseItem::Reasoning {
encrypted_content: Some(encrypted_content),
..
} if Some(index) == last_encrypted_reasoning_index
&& !encrypted_content.trim().is_empty() =>
{
Some(build_arc_monitor_message(
"assistant",
serde_json::json!([{
"type": "encrypted_reasoning",
"encrypted_content": encrypted_content,
}]),
))
}
ResponseItem::Reasoning { .. } => None,
ResponseItem::LocalShellCall { action, .. } if Some(index) == last_tool_call_index => {
Some(build_arc_monitor_message(
"assistant",
serde_json::json!([{
"type": "tool_call",
"tool_name": "shell",
"action": action,
}]),
))
}
ResponseItem::FunctionCall {
name, arguments, ..
} if Some(index) == last_tool_call_index => Some(build_arc_monitor_message(
"assistant",
serde_json::json!([{
"type": "tool_call",
"tool_name": name,
"arguments": arguments,
}]),
)),
ResponseItem::CustomToolCall { name, input, .. } if Some(index) == last_tool_call_index => {
Some(build_arc_monitor_message(
"assistant",
serde_json::json!([{
"type": "tool_call",
"tool_name": name,
"input": input,
}]),
))
}
ResponseItem::WebSearchCall { action, .. } if Some(index) == last_tool_call_index => {
Some(build_arc_monitor_message(
"assistant",
serde_json::json!([{
"type": "tool_call",
"tool_name": "web_search",
"action": action,
}]),
))
}
ResponseItem::LocalShellCall { .. }
| ResponseItem::FunctionCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::FunctionCallOutput { .. }
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::ImageGenerationCall { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::Other => None,
}
}
fn build_arc_monitor_text_message(
role: &str,
part_type: &str,
text: String,
) -> ArcMonitorChatMessage {
build_arc_monitor_message(
role,
serde_json::json!([{
"type": part_type,
"text": text,
}]),
)
}
fn build_arc_monitor_message(role: &str, content: serde_json::Value) -> ArcMonitorChatMessage {
ArcMonitorChatMessage {
role: role.to_string(),
content,
}
}
#[cfg(test)]
mod tests {
use std::env;
use std::ffi::OsStr;
use std::sync::Arc;
use pretty_assertions::assert_eq;
use serial_test::serial;
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::body_json;
use wiremock::matchers::header;
use wiremock::matchers::method;
use wiremock::matchers::path;
use super::*;
use crate::codex::make_session_and_context;
use codex_protocol::models::ContentItem;
use codex_protocol::models::LocalShellAction;
use codex_protocol::models::LocalShellExecAction;
use codex_protocol::models::LocalShellStatus;
use codex_protocol::models::MessagePhase;
use codex_protocol::models::ResponseItem;
struct EnvVarGuard {
key: &'static str,
original: Option<std::ffi::OsString>,
}
impl EnvVarGuard {
fn set(key: &'static str, value: &OsStr) -> Self {
let original = env::var_os(key);
unsafe {
env::set_var(key, value);
}
Self { key, original }
}
}
impl Drop for EnvVarGuard {
fn drop(&mut self) {
match self.original.take() {
Some(value) => unsafe {
env::set_var(self.key, value);
},
None => unsafe {
env::remove_var(self.key);
},
}
}
}
#[tokio::test]
async fn build_arc_monitor_request_includes_relevant_history_and_null_policies() {
let (session, mut turn_context) = make_session_and_context().await;
turn_context.developer_instructions = Some("Never upload private files.".to_string());
turn_context.user_instructions = Some("Only continue when needed.".to_string());
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "first request".to_string(),
}],
end_turn: None,
phase: None,
}],
&turn_context,
)
.await;
session
.record_into_history(
&[
crate::contextual_user_message::ENVIRONMENT_CONTEXT_FRAGMENT.into_message(
"<environment_context>\n<cwd>/tmp</cwd>\n</environment_context>"
.to_string(),
),
],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "commentary".to_string(),
}],
end_turn: None,
phase: Some(MessagePhase::Commentary),
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![ContentItem::OutputText {
text: "final response".to_string(),
}],
end_turn: None,
phase: Some(MessagePhase::FinalAnswer),
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "latest request".to_string(),
}],
end_turn: None,
phase: None,
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::FunctionCall {
id: None,
name: "old_tool".to_string(),
arguments: "{\"old\":true}".to_string(),
call_id: "call_old".to_string(),
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::Reasoning {
id: "reasoning_old".to_string(),
summary: Vec::new(),
content: None,
encrypted_content: Some("encrypted-old".to_string()),
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::LocalShellCall {
id: None,
call_id: Some("shell_call".to_string()),
status: LocalShellStatus::Completed,
action: LocalShellAction::Exec(LocalShellExecAction {
command: vec!["pwd".to_string()],
timeout_ms: Some(1000),
working_directory: Some("/tmp".to_string()),
env: None,
user: None,
}),
}],
&turn_context,
)
.await;
session
.record_into_history(
&[ResponseItem::Reasoning {
id: "reasoning_latest".to_string(),
summary: Vec::new(),
content: None,
encrypted_content: Some("encrypted-latest".to_string()),
}],
&turn_context,
)
.await;
let request = build_arc_monitor_request(
&session,
&turn_context,
serde_json::from_value(serde_json::json!({ "tool": "mcp_tool_call" }))
.expect("action should deserialize"),
)
.await;
assert_eq!(
request,
ArcMonitorRequest {
metadata: ArcMonitorMetadata {
codex_thread_id: session.conversation_id.to_string(),
codex_turn_id: turn_context.sub_id.clone(),
conversation_id: Some(session.conversation_id.to_string()),
protection_client_callsite: None,
},
messages: Some(vec![
ArcMonitorChatMessage {
role: "user".to_string(),
content: serde_json::json!([{
"type": "input_text",
"text": "first request",
}]),
},
ArcMonitorChatMessage {
role: "assistant".to_string(),
content: serde_json::json!([{
"type": "output_text",
"text": "final response",
}]),
},
ArcMonitorChatMessage {
role: "user".to_string(),
content: serde_json::json!([{
"type": "input_text",
"text": "latest request",
}]),
},
ArcMonitorChatMessage {
role: "assistant".to_string(),
content: serde_json::json!([{
"type": "tool_call",
"tool_name": "shell",
"action": {
"type": "exec",
"command": ["pwd"],
"timeout_ms": 1000,
"working_directory": "/tmp",
"env": null,
"user": null,
},
}]),
},
ArcMonitorChatMessage {
role: "assistant".to_string(),
content: serde_json::json!([{
"type": "encrypted_reasoning",
"encrypted_content": "encrypted-latest",
}]),
},
]),
input: None,
policies: Some(ArcMonitorPolicies {
user: None,
developer: None,
}),
action: serde_json::from_value(serde_json::json!({ "tool": "mcp_tool_call" }))
.expect("action should deserialize"),
}
);
}
#[tokio::test]
#[serial(arc_monitor_env)]
async fn monitor_action_posts_expected_arc_request() {
let server = MockServer::start().await;
let (session, mut turn_context) = make_session_and_context().await;
turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth(
crate::CodexAuth::create_dummy_chatgpt_auth_for_testing(),
));
turn_context.developer_instructions = Some("Developer policy".to_string());
turn_context.user_instructions = Some("User policy".to_string());
let mut config = (*turn_context.config).clone();
config.chatgpt_base_url = server.uri();
turn_context.config = Arc::new(config);
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "please run the tool".to_string(),
}],
end_turn: None,
phase: None,
}],
&turn_context,
)
.await;
Mock::given(method("POST"))
.and(path("/api/codex/safety/arc"))
.and(header("authorization", "Bearer Access Token"))
.and(header("chatgpt-account-id", "account_id"))
.and(body_json(serde_json::json!({
"metadata": {
"codex_thread_id": session.conversation_id.to_string(),
"codex_turn_id": turn_context.sub_id.clone(),
"conversation_id": session.conversation_id.to_string(),
},
"messages": [{
"role": "user",
"content": [{
"type": "input_text",
"text": "please run the tool",
}],
}],
"policies": {
"developer": null,
"user": null,
},
"action": {
"tool": "mcp_tool_call",
},
})))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"outcome": "ask-user",
"short_reason": "needs confirmation",
"rationale": "tool call needs additional review",
"risk_score": 42,
"risk_level": "medium",
"evidence": [{
"message": "browser_navigate",
"why": "tool call needs additional review",
}],
})))
.expect(1)
.mount(&server)
.await;
let outcome = monitor_action(
&session,
&turn_context,
serde_json::json!({ "tool": "mcp_tool_call" }),
)
.await;
assert_eq!(
outcome,
ArcMonitorOutcome::AskUser("needs confirmation".to_string())
);
}
#[tokio::test]
#[serial(arc_monitor_env)]
async fn monitor_action_uses_env_url_and_token_overrides() {
let server = MockServer::start().await;
let _url_guard = EnvVarGuard::set(
CODEX_ARC_MONITOR_ENDPOINT_OVERRIDE,
OsStr::new(&format!("{}/override/arc", server.uri())),
);
let _token_guard = EnvVarGuard::set(CODEX_ARC_MONITOR_TOKEN, OsStr::new("override-token"));
let (session, turn_context) = make_session_and_context().await;
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "please run the tool".to_string(),
}],
end_turn: None,
phase: None,
}],
&turn_context,
)
.await;
Mock::given(method("POST"))
.and(path("/override/arc"))
.and(header("authorization", "Bearer override-token"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"outcome": "steer-model",
"short_reason": "needs approval",
"rationale": "high-risk action",
"risk_score": 96,
"risk_level": "critical",
"evidence": [{
"message": "browser_navigate",
"why": "high-risk action",
}],
})))
.expect(1)
.mount(&server)
.await;
let outcome = monitor_action(
&session,
&turn_context,
serde_json::json!({ "tool": "mcp_tool_call" }),
)
.await;
assert_eq!(
outcome,
ArcMonitorOutcome::SteerModel("high-risk action".to_string())
);
}
#[tokio::test]
#[serial(arc_monitor_env)]
async fn monitor_action_rejects_legacy_response_fields() {
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/api/codex/safety/arc"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"outcome": "steer-model",
"reason": "legacy high-risk action",
"monitorRequestId": "arc_456",
})))
.expect(1)
.mount(&server)
.await;
let (session, mut turn_context) = make_session_and_context().await;
turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth(
crate::CodexAuth::create_dummy_chatgpt_auth_for_testing(),
));
let mut config = (*turn_context.config).clone();
config.chatgpt_base_url = server.uri();
turn_context.config = Arc::new(config);
session
.record_into_history(
&[ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "please run the tool".to_string(),
}],
end_turn: None,
phase: None,
}],
&turn_context,
)
.await;
let outcome = monitor_action(
&session,
&turn_context,
serde_json::json!({ "tool": "mcp_tool_call" }),
)
.await;
assert_eq!(outcome, ArcMonitorOutcome::Ok);
}
}

View file

@ -731,8 +731,8 @@ fn truncate_guardian_action_value(value: Value) -> Value {
}
}
fn format_guardian_action_pretty(action: &GuardianApprovalRequest) -> String {
let mut value = match action {
pub(crate) fn guardian_approval_request_to_json(action: &GuardianApprovalRequest) -> Value {
match action {
GuardianApprovalRequest::Shell {
command,
cwd,
@ -871,7 +871,11 @@ fn format_guardian_action_pretty(action: &GuardianApprovalRequest) -> String {
}
action
}
};
}
}
fn format_guardian_action_pretty(action: &GuardianApprovalRequest) -> String {
let mut value = guardian_approval_request_to_json(action);
value = truncate_guardian_action_value(value);
serde_json::to_string_pretty(&value).unwrap_or_else(|_| "null".to_string())
}

View file

@ -171,6 +171,45 @@ fn format_guardian_action_pretty_truncates_large_string_fields() {
assert!(rendered.len() < patch.len());
}
#[test]
fn guardian_approval_request_to_json_renders_mcp_tool_call_shape() {
let action = GuardianApprovalRequest::McpToolCall {
server: "mcp_server".to_string(),
tool_name: "browser_navigate".to_string(),
arguments: Some(serde_json::json!({
"url": "https://example.com",
})),
connector_id: None,
connector_name: Some("Playwright".to_string()),
connector_description: None,
tool_title: Some("Navigate".to_string()),
tool_description: None,
annotations: Some(GuardianMcpAnnotations {
destructive_hint: Some(true),
open_world_hint: None,
read_only_hint: Some(false),
}),
};
assert_eq!(
guardian_approval_request_to_json(&action),
serde_json::json!({
"tool": "mcp_tool_call",
"server": "mcp_server",
"tool_name": "browser_navigate",
"arguments": {
"url": "https://example.com",
},
"connector_name": "Playwright",
"tool_title": "Navigate",
"annotations": {
"destructive_hint": true,
"read_only_hint": false,
},
})
);
}
#[test]
fn build_guardian_transcript_reserves_separate_budget_for_tool_evidence() {
let repeated = "signal ".repeat(8_000);

View file

@ -9,6 +9,7 @@ mod analytics_client;
pub mod api_bridge;
mod apply_patch;
mod apps;
mod arc_monitor;
pub mod auth;
mod client;
mod client_common;

View file

@ -11,6 +11,8 @@ use tracing::error;
use crate::analytics_client::AppInvocation;
use crate::analytics_client::InvocationType;
use crate::analytics_client::build_track_events_context;
use crate::arc_monitor::ArcMonitorOutcome;
use crate::arc_monitor::monitor_action;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::edit::ConfigEdit;
@ -20,6 +22,7 @@ use crate::connectors;
use crate::features::Feature;
use crate::guardian::GuardianApprovalRequest;
use crate::guardian::GuardianMcpAnnotations;
use crate::guardian::guardian_approval_request_to_json;
use crate::guardian::review_approval_request;
use crate::guardian::routes_approval_to_guardian;
use crate::mcp::CODEX_APPS_MCP_SERVER_NAME;
@ -197,6 +200,16 @@ pub(crate) async fn handle_mcp_tool_call(
)
.await
}
McpToolApprovalDecision::BlockedBySafetyMonitor(message) => {
notify_mcp_tool_call_skip(
sess.as_ref(),
turn_context.as_ref(),
&call_id,
invocation,
message,
)
.await
}
};
let status = if result.is_ok() { "ok" } else { "error" };
@ -348,13 +361,14 @@ async fn maybe_track_codex_app_used(
);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq)]
enum McpToolApprovalDecision {
Accept,
AcceptForSession,
AcceptAndRemember,
Decline,
Cancel,
BlockedBySafetyMonitor(String),
}
struct McpToolApprovalMetadata {
@ -373,9 +387,9 @@ struct McpToolApprovalPromptOptions {
}
const MCP_TOOL_APPROVAL_QUESTION_ID_PREFIX: &str = "mcp_tool_call_approval";
const MCP_TOOL_APPROVAL_ACCEPT: &str = "Approve Once";
const MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION: &str = "Approve this session";
const MCP_TOOL_APPROVAL_ACCEPT_AND_REMEMBER: &str = "Always allow";
const MCP_TOOL_APPROVAL_ACCEPT: &str = "Allow";
const MCP_TOOL_APPROVAL_ACCEPT_FOR_SESSION: &str = "Allow for this session";
const MCP_TOOL_APPROVAL_ACCEPT_AND_REMEMBER: &str = "Allow and don't ask me again";
const MCP_TOOL_APPROVAL_CANCEL: &str = "Cancel";
const MCP_TOOL_APPROVAL_KIND_KEY: &str = "codex_approval_kind";
const MCP_TOOL_APPROVAL_KIND_MCP_TOOL_CALL: &str = "mcp_tool_call";
@ -418,15 +432,35 @@ async fn maybe_request_mcp_tool_approval(
metadata: Option<&McpToolApprovalMetadata>,
approval_mode: AppToolApproval,
) -> Option<McpToolApprovalDecision> {
if approval_mode == AppToolApproval::Approve {
return None;
}
let annotations = metadata.and_then(|metadata| metadata.annotations.as_ref());
let approval_required = annotations.is_some_and(requires_mcp_tool_approval);
let mut monitor_reason = None;
if approval_mode == AppToolApproval::Approve {
if !approval_required {
return None;
}
match maybe_monitor_auto_approved_mcp_tool_call(sess, turn_context, invocation, metadata)
.await
{
ArcMonitorOutcome::Ok => return None,
ArcMonitorOutcome::AskUser(reason) => {
monitor_reason = Some(reason);
}
ArcMonitorOutcome::SteerModel(reason) => {
return Some(McpToolApprovalDecision::BlockedBySafetyMonitor(
arc_monitor_interrupt_message(&reason),
));
}
}
}
if approval_mode == AppToolApproval::Auto {
if is_full_access_mode(turn_context) {
return None;
}
if !annotations.is_some_and(requires_mcp_tool_approval) {
if !approval_required {
return None;
}
}
@ -444,7 +478,7 @@ async fn maybe_request_mcp_tool_approval(
.features
.enabled(Feature::ToolCallMcpElicitation);
if routes_approval_to_guardian(turn_context) {
if monitor_reason.is_none() && routes_approval_to_guardian(turn_context) {
let decision = review_approval_request(
sess,
turn_context,
@ -456,7 +490,7 @@ async fn maybe_request_mcp_tool_approval(
apply_mcp_tool_approval_decision(
sess,
turn_context,
decision,
&decision,
session_approval_key,
persistent_approval_key,
)
@ -470,7 +504,7 @@ async fn maybe_request_mcp_tool_approval(
tool_call_mcp_elicitation_enabled,
);
let question_id = format!("{MCP_TOOL_APPROVAL_QUESTION_ID_PREFIX}_{call_id}");
let question = build_mcp_tool_approval_question(
let mut question = build_mcp_tool_approval_question(
question_id.clone(),
&invocation.server,
&invocation.tool,
@ -479,6 +513,8 @@ async fn maybe_request_mcp_tool_approval(
annotations,
prompt_options,
);
question.question =
mcp_tool_approval_question_text(question.question, monitor_reason.as_deref());
if tool_call_mcp_elicitation_enabled {
let request_id = rmcp::model::RequestId::String(
format!("{MCP_TOOL_APPROVAL_QUESTION_ID_PREFIX}_{call_id}").into(),
@ -501,7 +537,7 @@ async fn maybe_request_mcp_tool_approval(
apply_mcp_tool_approval_decision(
sess,
turn_context,
decision,
&decision,
session_approval_key,
persistent_approval_key,
)
@ -522,7 +558,7 @@ async fn maybe_request_mcp_tool_approval(
apply_mcp_tool_approval_decision(
sess,
turn_context,
decision,
&decision,
session_approval_key,
persistent_approval_key,
)
@ -530,6 +566,24 @@ async fn maybe_request_mcp_tool_approval(
Some(decision)
}
async fn maybe_monitor_auto_approved_mcp_tool_call(
sess: &Session,
turn_context: &TurnContext,
invocation: &McpInvocation,
metadata: Option<&McpToolApprovalMetadata>,
) -> ArcMonitorOutcome {
let action = prepare_arc_request_action(invocation, metadata);
monitor_action(sess, turn_context, action).await
}
fn prepare_arc_request_action(
invocation: &McpInvocation,
metadata: Option<&McpToolApprovalMetadata>,
) -> serde_json::Value {
let request = build_guardian_mcp_tool_review_request(invocation, metadata);
guardian_approval_request_to_json(&request)
}
fn session_mcp_tool_approval_key(
invocation: &McpInvocation,
metadata: Option<&McpToolApprovalMetadata>,
@ -732,7 +786,7 @@ fn build_mcp_tool_approval_question(
}
options.push(RequestUserInputQuestionOption {
label: MCP_TOOL_APPROVAL_CANCEL.to_string(),
description: "Cancel this tool call".to_string(),
description: "Cancel this tool call.".to_string(),
});
RequestUserInputQuestion {
@ -745,6 +799,24 @@ fn build_mcp_tool_approval_question(
}
}
fn mcp_tool_approval_question_text(question: String, monitor_reason: Option<&str>) -> String {
match monitor_reason.map(str::trim) {
Some(reason) if !reason.is_empty() => {
format!("Tool call needs your approval. Reason: {reason}")
}
_ => question,
}
}
fn arc_monitor_interrupt_message(reason: &str) -> String {
let reason = reason.trim();
if reason.is_empty() {
"Tool call was cancelled because of safety risks.".to_string()
} else {
format!("Tool call was cancelled because of safety risks: {reason}")
}
}
fn build_mcp_tool_approval_elicitation_request(
sess: &Session,
turn_context: &TurnContext,
@ -1001,7 +1073,7 @@ async fn remember_mcp_tool_approval(sess: &Session, key: McpToolApprovalKey) {
async fn apply_mcp_tool_approval_decision(
sess: &Session,
turn_context: &TurnContext,
decision: McpToolApprovalDecision,
decision: &McpToolApprovalDecision,
session_approval_key: Option<McpToolApprovalKey>,
persistent_approval_key: Option<McpToolApprovalKey>,
) {
@ -1020,7 +1092,8 @@ async fn apply_mcp_tool_approval_decision(
}
McpToolApprovalDecision::Accept
| McpToolApprovalDecision::Decline
| McpToolApprovalDecision::Cancel => {}
| McpToolApprovalDecision::Cancel
| McpToolApprovalDecision::BlockedBySafetyMonitor(_) => {}
}
}
@ -1117,6 +1190,7 @@ mod tests {
use pretty_assertions::assert_eq;
use serde::Deserialize;
use std::collections::HashMap;
use std::sync::Arc;
use tempfile::tempdir;
fn annotations(
@ -1196,6 +1270,17 @@ mod tests {
);
}
#[test]
fn approval_question_text_prepends_safety_reason() {
assert_eq!(
mcp_tool_approval_question_text(
"Allow this action?".to_string(),
Some("This tool may contact an external system."),
),
"Tool call needs your approval. Reason: This tool may contact an external system."
);
}
#[test]
fn custom_mcp_tool_question_mentions_server_name() {
let question = build_mcp_tool_approval_question(
@ -1581,6 +1666,42 @@ mod tests {
);
}
#[test]
fn prepare_arc_request_action_serializes_mcp_tool_call_shape() {
let invocation = McpInvocation {
server: CODEX_APPS_MCP_SERVER_NAME.to_string(),
tool: "browser_navigate".to_string(),
arguments: Some(serde_json::json!({
"url": "https://example.com",
})),
};
let action = prepare_arc_request_action(
&invocation,
Some(&approval_metadata(
None,
Some("Playwright"),
None,
Some("Navigate"),
None,
)),
);
assert_eq!(
action,
serde_json::json!({
"tool": "mcp_tool_call",
"server": CODEX_APPS_MCP_SERVER_NAME,
"tool_name": "browser_navigate",
"arguments": {
"url": "https://example.com",
},
"connector_name": "Playwright",
"tool_title": "Navigate",
})
);
}
#[test]
fn guardian_review_decision_maps_to_mcp_tool_decision() {
assert_eq!(
@ -1805,4 +1926,104 @@ mod tests {
);
assert_eq!(mcp_tool_approval_is_remembered(&session, &key).await, true);
}
#[tokio::test]
async fn approve_mode_skips_when_annotations_do_not_require_approval() {
let (session, turn_context) = make_session_and_context().await;
let session = Arc::new(session);
let turn_context = Arc::new(turn_context);
let invocation = McpInvocation {
server: "custom_server".to_string(),
tool: "read_only_tool".to_string(),
arguments: None,
};
let metadata = McpToolApprovalMetadata {
annotations: Some(annotations(Some(true), None, None)),
connector_id: None,
connector_name: None,
connector_description: None,
tool_title: Some("Read Only Tool".to_string()),
tool_description: None,
};
let decision = maybe_request_mcp_tool_approval(
&session,
&turn_context,
"call-1",
&invocation,
Some(&metadata),
AppToolApproval::Approve,
)
.await;
assert_eq!(decision, None);
}
#[tokio::test]
async fn approve_mode_blocks_when_arc_returns_interrupt_for_model() {
use wiremock::Mock;
use wiremock::MockServer;
use wiremock::ResponseTemplate;
use wiremock::matchers::method;
use wiremock::matchers::path;
let server = MockServer::start().await;
Mock::given(method("POST"))
.and(path("/api/codex/safety/arc"))
.respond_with(ResponseTemplate::new(200).set_body_json(serde_json::json!({
"outcome": "steer-model",
"short_reason": "needs approval",
"rationale": "high-risk action",
"risk_score": 96,
"risk_level": "critical",
"evidence": [{
"message": "dangerous_tool",
"why": "high-risk action",
}],
})))
.expect(1)
.mount(&server)
.await;
let (session, mut turn_context) = make_session_and_context().await;
turn_context.auth_manager = Some(crate::test_support::auth_manager_from_auth(
crate::CodexAuth::create_dummy_chatgpt_auth_for_testing(),
));
let mut config = (*turn_context.config).clone();
config.chatgpt_base_url = server.uri();
turn_context.config = Arc::new(config);
let session = Arc::new(session);
let turn_context = Arc::new(turn_context);
let invocation = McpInvocation {
server: CODEX_APPS_MCP_SERVER_NAME.to_string(),
tool: "dangerous_tool".to_string(),
arguments: Some(serde_json::json!({ "id": 1 })),
};
let metadata = McpToolApprovalMetadata {
annotations: Some(annotations(Some(false), Some(true), Some(true))),
connector_id: Some("calendar".to_string()),
connector_name: Some("Calendar".to_string()),
connector_description: Some("Manage events".to_string()),
tool_title: Some("Dangerous Tool".to_string()),
tool_description: Some("Performs a risky action.".to_string()),
};
let decision = maybe_request_mcp_tool_approval(
&session,
&turn_context,
"call-2",
&invocation,
Some(&metadata),
AppToolApproval::Approve,
)
.await;
assert_eq!(
decision,
Some(McpToolApprovalDecision::BlockedBySafetyMonitor(
"Tool call was cancelled because of safety risks: high-risk action".to_string(),
))
);
}
}

View file

@ -229,7 +229,7 @@ pub(crate) trait Approvable<Req> {
// In most cases (shell, unified_exec), a request will have a single approval key.
//
// However, apply_patch needs session "approve once, don't ask again" semantics that
// However, apply_patch needs session "Allow, don't ask again" semantics that
// apply to multiple atomic targets (e.g., apply_patch approves per file path). Returning
// a list of keys lets the runtime treat the request as approved-for-session only if
// *all* keys are already approved, while still caching approvals per-key so future

View file

@ -190,7 +190,7 @@ impl McpServerElicitationFormRequest {
|| (is_tool_approval && is_empty_object_schema)
{
let mut options = vec![McpServerElicitationOption {
label: "Approve Once".to_string(),
label: "Allow".to_string(),
description: Some("Run the tool and continue.".to_string()),
value: Value::String(APPROVAL_ACCEPT_ONCE_VALUE.to_string()),
}];
@ -201,7 +201,7 @@ impl McpServerElicitationFormRequest {
)
{
options.push(McpServerElicitationOption {
label: "Approve this session".to_string(),
label: "Allow for this session".to_string(),
description: Some(
"Run the tool and remember this choice for this session.".to_string(),
),
@ -1601,7 +1601,7 @@ mod tests {
input: McpServerElicitationFieldInput::Select {
options: vec![
McpServerElicitationOption {
label: "Approve Once".to_string(),
label: "Allow".to_string(),
description: Some("Run the tool and continue.".to_string()),
value: Value::String(APPROVAL_ACCEPT_ONCE_VALUE.to_string()),
},
@ -1654,7 +1654,7 @@ mod tests {
input: McpServerElicitationFieldInput::Select {
options: vec![
McpServerElicitationOption {
label: "Approve Once".to_string(),
label: "Allow".to_string(),
description: Some("Run the tool and continue.".to_string()),
value: Value::String(APPROVAL_ACCEPT_ONCE_VALUE.to_string()),
},

View file

@ -5,10 +5,10 @@ expression: "render_snapshot(&overlay, Rect::new(0, 0, 120, 16))"
Field 1/1
Allow this request?
1. Approve Once Run the tool and continue.
2. Approve this session Run the tool and remember this choice for this session.
3. Always allow Run the tool and remember this choice for future tool calls.
4. Cancel Cancel this tool call
1. Allow Run the tool and continue.
2. Allow for this session Run the tool and remember this choice for this session.
3. Always allow Run the tool and remember this choice for future tool calls.
4. Cancel Cancel this tool call

View file

@ -5,8 +5,8 @@ expression: "render_snapshot(&overlay, Rect::new(0, 0, 120, 16))"
Field 1/1
Allow this request?
1. Approve Once Run the tool and continue.
2. Cancel Cancel this tool call
1. Allow Run the tool and continue.
2. Cancel Cancel this tool call