add sandbox policy and sandbox name to codex.tool.call metrics (#10711)

This will give visibility into the comparative success rate of the Windows sandbox implementations compared to other platforms.
2026-02-05 11:42:12 -08:00 · 2026-02-05 11:42:12 -08:00 · 901d5b8fd6
commit 901d5b8fd6
parent 4df9f2020b
4 changed files with 75 additions and 17 deletions
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@ -128,6 +128,17 @@ pub enum SandboxType {
    WindowsRestrictedToken,
 }

+impl SandboxType {
+    pub(crate) fn as_metric_tag(self) -> &'static str {
+        match self {
+            SandboxType::None => "none",
+            SandboxType::MacosSeatbelt => "seatbelt",
+            SandboxType::LinuxSeccomp => "seccomp",
+            SandboxType::WindowsRestrictedToken => "windows_sandbox",
+        }
+    }
+}
+
 #[derive(Clone)]
 pub struct StdoutStream {
    pub sub_id: String,
--- a/codex-rs/core/src/tools/registry.rs
+++ b/codex-rs/core/src/tools/registry.rs
@ -3,11 +3,15 @@ use std::sync::Arc;
 use std::time::Duration;

 use crate::client_common::tools::ToolSpec;
+use crate::exec::SandboxType;
 use crate::function_tool::FunctionCallError;
+use crate::protocol::SandboxPolicy;
+use crate::safety::get_platform_sandbox;
 use crate::tools::context::ToolInvocation;
 use crate::tools::context::ToolOutput;
 use crate::tools::context::ToolPayload;
 use async_trait::async_trait;
+use codex_protocol::config_types::WindowsSandboxLevel;
 use codex_protocol::models::ResponseInputItem;
 use codex_utils_readiness::Readiness;
 use tracing::warn;
@ -73,19 +77,33 @@ impl ToolRegistry {
        let otel = invocation.turn.otel_manager.clone();
        let payload_for_response = invocation.payload.clone();
        let log_payload = payload_for_response.log_payload();
+        let metric_tags = [
+            (
+                "sandbox",
+                sandbox_tag(
+                    &invocation.turn.sandbox_policy,
+                    invocation.turn.windows_sandbox_level,
+                ),
+            ),
+            (
+                "sandbox_policy",
+                sandbox_policy_tag(&invocation.turn.sandbox_policy),
+            ),
+        ];

        let handler = match self.handler(tool_name.as_ref()) {
            Some(handler) => handler,
            None => {
                let message =
                    unsupported_tool_call_message(&invocation.payload, tool_name.as_ref());
-                otel.tool_result(
+                otel.tool_result_with_tags(
                    tool_name.as_ref(),
                    &call_id_owned,
                    log_payload.as_ref(),
                    Duration::ZERO,
                    false,
                    &message,
+                    &metric_tags,
                );
                return Err(FunctionCallError::RespondToModel(message));
            }
@ -93,13 +111,14 @@ impl ToolRegistry {

        if !handler.matches_kind(&invocation.payload) {
            let message = format!("tool {tool_name} invoked with incompatible payload");
-            otel.tool_result(
+            otel.tool_result_with_tags(
                tool_name.as_ref(),
                &call_id_owned,
                log_payload.as_ref(),
                Duration::ZERO,
                false,
                &message,
+                &metric_tags,
            );
            return Err(FunctionCallError::Fatal(message));
        }
@ -107,10 +126,11 @@ impl ToolRegistry {
        let output_cell = tokio::sync::Mutex::new(None);

        let result = otel
-            .log_tool_result(
+            .log_tool_result_with_tags(
                tool_name.as_ref(),
                &call_id_owned,
                log_payload.as_ref(),
+                &metric_tags,
                || {
                    let handler = handler.clone();
                    let output_cell = &output_cell;
@ -231,3 +251,29 @@ fn unsupported_tool_call_message(payload: &ToolPayload, tool_name: &str) -> Stri
        _ => format!("unsupported call: {tool_name}"),
    }
 }
+
+fn sandbox_tag(policy: &SandboxPolicy, windows_sandbox_level: WindowsSandboxLevel) -> &'static str {
+    if matches!(policy, SandboxPolicy::DangerFullAccess) {
+        return "none";
+    }
+    if matches!(policy, SandboxPolicy::ExternalSandbox { .. }) {
+        return "external";
+    }
+    if cfg!(target_os = "windows") && matches!(windows_sandbox_level, WindowsSandboxLevel::Elevated)
+    {
+        return "windows_elevated";
+    }
+
+    get_platform_sandbox(windows_sandbox_level != WindowsSandboxLevel::Disabled)
+        .map(SandboxType::as_metric_tag)
+        .unwrap_or("none")
+}
+
+fn sandbox_policy_tag(policy: &SandboxPolicy) -> &'static str {
+    match policy {
+        SandboxPolicy::ReadOnly => "read-only",
+        SandboxPolicy::WorkspaceWrite { .. } => "workspace-write",
+        SandboxPolicy::DangerFullAccess => "danger-full-access",
+        SandboxPolicy::ExternalSandbox { .. } => "external-sandbox",
+    }
+}
--- a/codex-rs/otel/src/traces/otel_manager.rs
+++ b/codex-rs/otel/src/traces/otel_manager.rs
@ -566,11 +566,12 @@ impl OtelManager {
        );
    }

-    pub async fn log_tool_result<F, Fut, E>(
+    pub async fn log_tool_result_with_tags<F, Fut, E>(
        &self,
        tool_name: &str,
        call_id: &str,
        arguments: &str,
+        extra_tags: &[(&str, &str)],
        f: F,
    ) -> Result<(String, bool), E>
    where
@ -587,13 +588,14 @@ impl OtelManager {
            Err(error) => (Cow::Owned(error.to_string()), false),
        };

-        self.tool_result(
+        self.tool_result_with_tags(
            tool_name,
            call_id,
            arguments,
            duration,
            success,
            output.as_ref(),
+            extra_tags,
        );

        result
@ -619,7 +621,8 @@ impl OtelManager {
        );
    }

-    pub fn tool_result(
+    #[allow(clippy::too_many_arguments)]
+    pub fn tool_result_with_tags(
        &self,
        tool_name: &str,
        call_id: &str,
@ -627,18 +630,15 @@ impl OtelManager {
        duration: Duration,
        success: bool,
        output: &str,
+        extra_tags: &[(&str, &str)],
    ) {
        let success_str = if success { "true" } else { "false" };
-        self.counter(
-            TOOL_CALL_COUNT_METRIC,
-            1,
-            &[("tool", tool_name), ("success", success_str)],
-        );
-        self.record_duration(
-            TOOL_CALL_DURATION_METRIC,
-            duration,
-            &[("tool", tool_name), ("success", success_str)],
-        );
+        let mut tags = Vec::with_capacity(2 + extra_tags.len());
+        tags.push(("tool", tool_name));
+        tags.push(("success", success_str));
+        tags.extend_from_slice(extra_tags);
+        self.counter(TOOL_CALL_COUNT_METRIC, 1, &tags);
+        self.record_duration(TOOL_CALL_DURATION_METRIC, duration, &tags);
        tracing::event!(
            tracing::Level::INFO,
            event.name = "codex.tool_result",
--- a/codex-rs/otel/tests/suite/runtime_summary.rs
+++ b/codex-rs/otel/tests/suite/runtime_summary.rs
@ -35,13 +35,14 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<(

    manager.reset_runtime_metrics();

-    manager.tool_result(
+    manager.tool_result_with_tags(
        "shell",
        "call-1",
        "{\"cmd\":\"echo\"}",
        Duration::from_millis(250),
        true,
        "ok",
+        &[],
    );
    manager.record_api_request(1, Some(200), None, Duration::from_millis(300));
    manager.record_websocket_request(Duration::from_millis(400), None);