From 901d5b8fd60afb6180cbf0d61ceb76cd0df0a078 Mon Sep 17 00:00:00 2001 From: iceweasel-oai Date: Thu, 5 Feb 2026 11:42:12 -0800 Subject: [PATCH] add sandbox policy and sandbox name to codex.tool.call metrics (#10711) This will give visibility into the comparative success rate of the Windows sandbox implementations compared to other platforms. --- codex-rs/core/src/exec.rs | 11 +++++ codex-rs/core/src/tools/registry.rs | 52 ++++++++++++++++++-- codex-rs/otel/src/traces/otel_manager.rs | 26 +++++----- codex-rs/otel/tests/suite/runtime_summary.rs | 3 +- 4 files changed, 75 insertions(+), 17 deletions(-) diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs index 3ac8d1956..d4d508ced 100644 --- a/codex-rs/core/src/exec.rs +++ b/codex-rs/core/src/exec.rs @@ -128,6 +128,17 @@ pub enum SandboxType { WindowsRestrictedToken, } +impl SandboxType { + pub(crate) fn as_metric_tag(self) -> &'static str { + match self { + SandboxType::None => "none", + SandboxType::MacosSeatbelt => "seatbelt", + SandboxType::LinuxSeccomp => "seccomp", + SandboxType::WindowsRestrictedToken => "windows_sandbox", + } + } +} + #[derive(Clone)] pub struct StdoutStream { pub sub_id: String, diff --git a/codex-rs/core/src/tools/registry.rs b/codex-rs/core/src/tools/registry.rs index f67db6394..3de041c57 100644 --- a/codex-rs/core/src/tools/registry.rs +++ b/codex-rs/core/src/tools/registry.rs @@ -3,11 +3,15 @@ use std::sync::Arc; use std::time::Duration; use crate::client_common::tools::ToolSpec; +use crate::exec::SandboxType; use crate::function_tool::FunctionCallError; +use crate::protocol::SandboxPolicy; +use crate::safety::get_platform_sandbox; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use async_trait::async_trait; +use codex_protocol::config_types::WindowsSandboxLevel; use codex_protocol::models::ResponseInputItem; use codex_utils_readiness::Readiness; use tracing::warn; @@ -73,19 +77,33 @@ impl ToolRegistry { let otel = invocation.turn.otel_manager.clone(); let payload_for_response = invocation.payload.clone(); let log_payload = payload_for_response.log_payload(); + let metric_tags = [ + ( + "sandbox", + sandbox_tag( + &invocation.turn.sandbox_policy, + invocation.turn.windows_sandbox_level, + ), + ), + ( + "sandbox_policy", + sandbox_policy_tag(&invocation.turn.sandbox_policy), + ), + ]; let handler = match self.handler(tool_name.as_ref()) { Some(handler) => handler, None => { let message = unsupported_tool_call_message(&invocation.payload, tool_name.as_ref()); - otel.tool_result( + otel.tool_result_with_tags( tool_name.as_ref(), &call_id_owned, log_payload.as_ref(), Duration::ZERO, false, &message, + &metric_tags, ); return Err(FunctionCallError::RespondToModel(message)); } @@ -93,13 +111,14 @@ impl ToolRegistry { if !handler.matches_kind(&invocation.payload) { let message = format!("tool {tool_name} invoked with incompatible payload"); - otel.tool_result( + otel.tool_result_with_tags( tool_name.as_ref(), &call_id_owned, log_payload.as_ref(), Duration::ZERO, false, &message, + &metric_tags, ); return Err(FunctionCallError::Fatal(message)); } @@ -107,10 +126,11 @@ impl ToolRegistry { let output_cell = tokio::sync::Mutex::new(None); let result = otel - .log_tool_result( + .log_tool_result_with_tags( tool_name.as_ref(), &call_id_owned, log_payload.as_ref(), + &metric_tags, || { let handler = handler.clone(); let output_cell = &output_cell; @@ -231,3 +251,29 @@ fn unsupported_tool_call_message(payload: &ToolPayload, tool_name: &str) -> Stri _ => format!("unsupported call: {tool_name}"), } } + +fn sandbox_tag(policy: &SandboxPolicy, windows_sandbox_level: WindowsSandboxLevel) -> &'static str { + if matches!(policy, SandboxPolicy::DangerFullAccess) { + return "none"; + } + if matches!(policy, SandboxPolicy::ExternalSandbox { .. }) { + return "external"; + } + if cfg!(target_os = "windows") && matches!(windows_sandbox_level, WindowsSandboxLevel::Elevated) + { + return "windows_elevated"; + } + + get_platform_sandbox(windows_sandbox_level != WindowsSandboxLevel::Disabled) + .map(SandboxType::as_metric_tag) + .unwrap_or("none") +} + +fn sandbox_policy_tag(policy: &SandboxPolicy) -> &'static str { + match policy { + SandboxPolicy::ReadOnly => "read-only", + SandboxPolicy::WorkspaceWrite { .. } => "workspace-write", + SandboxPolicy::DangerFullAccess => "danger-full-access", + SandboxPolicy::ExternalSandbox { .. } => "external-sandbox", + } +} diff --git a/codex-rs/otel/src/traces/otel_manager.rs b/codex-rs/otel/src/traces/otel_manager.rs index b174dabe6..aa3006da0 100644 --- a/codex-rs/otel/src/traces/otel_manager.rs +++ b/codex-rs/otel/src/traces/otel_manager.rs @@ -566,11 +566,12 @@ impl OtelManager { ); } - pub async fn log_tool_result( + pub async fn log_tool_result_with_tags( &self, tool_name: &str, call_id: &str, arguments: &str, + extra_tags: &[(&str, &str)], f: F, ) -> Result<(String, bool), E> where @@ -587,13 +588,14 @@ impl OtelManager { Err(error) => (Cow::Owned(error.to_string()), false), }; - self.tool_result( + self.tool_result_with_tags( tool_name, call_id, arguments, duration, success, output.as_ref(), + extra_tags, ); result @@ -619,7 +621,8 @@ impl OtelManager { ); } - pub fn tool_result( + #[allow(clippy::too_many_arguments)] + pub fn tool_result_with_tags( &self, tool_name: &str, call_id: &str, @@ -627,18 +630,15 @@ impl OtelManager { duration: Duration, success: bool, output: &str, + extra_tags: &[(&str, &str)], ) { let success_str = if success { "true" } else { "false" }; - self.counter( - TOOL_CALL_COUNT_METRIC, - 1, - &[("tool", tool_name), ("success", success_str)], - ); - self.record_duration( - TOOL_CALL_DURATION_METRIC, - duration, - &[("tool", tool_name), ("success", success_str)], - ); + let mut tags = Vec::with_capacity(2 + extra_tags.len()); + tags.push(("tool", tool_name)); + tags.push(("success", success_str)); + tags.extend_from_slice(extra_tags); + self.counter(TOOL_CALL_COUNT_METRIC, 1, &tags); + self.record_duration(TOOL_CALL_DURATION_METRIC, duration, &tags); tracing::event!( tracing::Level::INFO, event.name = "codex.tool_result", diff --git a/codex-rs/otel/tests/suite/runtime_summary.rs b/codex-rs/otel/tests/suite/runtime_summary.rs index 71d9a140c..6ef2927f4 100644 --- a/codex-rs/otel/tests/suite/runtime_summary.rs +++ b/codex-rs/otel/tests/suite/runtime_summary.rs @@ -35,13 +35,14 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<( manager.reset_runtime_metrics(); - manager.tool_result( + manager.tool_result_with_tags( "shell", "call-1", "{\"cmd\":\"echo\"}", Duration::from_millis(250), true, "ok", + &[], ); manager.record_api_request(1, Some(200), None, Duration::from_millis(300)); manager.record_websocket_request(Duration::from_millis(400), None);