feat(core): emit turn metric for network proxy state (#14250)
## Summary - add a per-turn `codex.turn.network_proxy` metric constant - emit the metric from turn completion using the live managed proxy enabled state - add focused tests for active and inactive tag emission
This commit is contained in:
parent
8f8a0f55ce
commit
52a3bde6cc
2 changed files with 151 additions and 1 deletions
|
|
@ -33,7 +33,9 @@ use crate::protocol::TurnCompleteEvent;
|
|||
use crate::state::ActiveTurn;
|
||||
use crate::state::RunningTask;
|
||||
use crate::state::TaskKind;
|
||||
use codex_otel::SessionTelemetry;
|
||||
use codex_otel::metrics::names::TURN_E2E_DURATION_METRIC;
|
||||
use codex_otel::metrics::names::TURN_NETWORK_PROXY_METRIC;
|
||||
use codex_otel::metrics::names::TURN_TOKEN_USAGE_METRIC;
|
||||
use codex_otel::metrics::names::TURN_TOOL_CALL_METRIC;
|
||||
use codex_protocol::items::TurnItem;
|
||||
|
|
@ -56,6 +58,19 @@ pub(crate) use user_shell::execute_user_shell_command;
|
|||
const GRACEFULL_INTERRUPTION_TIMEOUT_MS: u64 = 100;
|
||||
const TURN_ABORTED_INTERRUPTED_GUIDANCE: &str = "The user interrupted the previous turn on purpose. Any running unified exec processes were terminated. If any tools/commands were aborted, they may have partially executed; verify current state before retrying.";
|
||||
|
||||
fn emit_turn_network_proxy_metric(
|
||||
session_telemetry: &SessionTelemetry,
|
||||
network_proxy_active: bool,
|
||||
tmp_mem: (&str, &str),
|
||||
) {
|
||||
let active = if network_proxy_active {
|
||||
"true"
|
||||
} else {
|
||||
"false"
|
||||
};
|
||||
session_telemetry.counter(TURN_NETWORK_PROXY_METRIC, 1, &[("active", active), tmp_mem]);
|
||||
}
|
||||
|
||||
/// Thin wrapper that exposes the parts of [`Session`] task runners need.
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct SessionTaskContext {
|
||||
|
|
@ -280,6 +295,25 @@ impl Session {
|
|||
"false"
|
||||
},
|
||||
);
|
||||
let network_proxy_active = match self.services.network_proxy.as_ref() {
|
||||
Some(started_network_proxy) => {
|
||||
match started_network_proxy.proxy().current_cfg().await {
|
||||
Ok(config) => config.network.enabled,
|
||||
Err(err) => {
|
||||
warn!(
|
||||
"failed to read managed network proxy state for turn metrics: {err:#}"
|
||||
);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
None => false,
|
||||
};
|
||||
emit_turn_network_proxy_metric(
|
||||
&self.services.session_telemetry,
|
||||
network_proxy_active,
|
||||
tmp_mem,
|
||||
);
|
||||
self.services.session_telemetry.histogram(
|
||||
TURN_TOOL_CALL_METRIC,
|
||||
i64::try_from(turn_tool_calls).unwrap_or(i64::MAX),
|
||||
|
|
@ -420,4 +454,119 @@ impl Session {
|
|||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {}
|
||||
mod tests {
|
||||
use super::emit_turn_network_proxy_metric;
|
||||
use codex_otel::SessionTelemetry;
|
||||
use codex_otel::metrics::MetricsClient;
|
||||
use codex_otel::metrics::MetricsConfig;
|
||||
use codex_otel::metrics::names::TURN_NETWORK_PROXY_METRIC;
|
||||
use codex_protocol::ThreadId;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use opentelemetry::KeyValue;
|
||||
use opentelemetry_sdk::metrics::InMemoryMetricExporter;
|
||||
use opentelemetry_sdk::metrics::data::AggregatedMetrics;
|
||||
use opentelemetry_sdk::metrics::data::Metric;
|
||||
use opentelemetry_sdk::metrics::data::MetricData;
|
||||
use opentelemetry_sdk::metrics::data::ResourceMetrics;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
fn test_session_telemetry() -> SessionTelemetry {
|
||||
let exporter = InMemoryMetricExporter::default();
|
||||
let metrics = MetricsClient::new(
|
||||
MetricsConfig::in_memory("test", "codex-core", env!("CARGO_PKG_VERSION"), exporter)
|
||||
.with_runtime_reader(),
|
||||
)
|
||||
.expect("in-memory metrics client");
|
||||
SessionTelemetry::new(
|
||||
ThreadId::new(),
|
||||
"gpt-5.1",
|
||||
"gpt-5.1",
|
||||
None,
|
||||
None,
|
||||
None,
|
||||
"test_originator".to_string(),
|
||||
false,
|
||||
"tty".to_string(),
|
||||
SessionSource::Cli,
|
||||
)
|
||||
.with_metrics_without_metadata_tags(metrics)
|
||||
}
|
||||
|
||||
fn find_metric<'a>(resource_metrics: &'a ResourceMetrics, name: &str) -> &'a Metric {
|
||||
for scope_metrics in resource_metrics.scope_metrics() {
|
||||
for metric in scope_metrics.metrics() {
|
||||
if metric.name() == name {
|
||||
return metric;
|
||||
}
|
||||
}
|
||||
}
|
||||
panic!("metric {name} missing");
|
||||
}
|
||||
|
||||
fn attributes_to_map<'a>(
|
||||
attributes: impl Iterator<Item = &'a KeyValue>,
|
||||
) -> BTreeMap<String, String> {
|
||||
attributes
|
||||
.map(|kv| (kv.key.as_str().to_string(), kv.value.as_str().to_string()))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn metric_point(resource_metrics: &ResourceMetrics) -> (BTreeMap<String, String>, u64) {
|
||||
let metric = find_metric(resource_metrics, TURN_NETWORK_PROXY_METRIC);
|
||||
match metric.data() {
|
||||
AggregatedMetrics::U64(data) => match data {
|
||||
MetricData::Sum(sum) => {
|
||||
let points: Vec<_> = sum.data_points().collect();
|
||||
assert_eq!(points.len(), 1);
|
||||
let point = points[0];
|
||||
(attributes_to_map(point.attributes()), point.value())
|
||||
}
|
||||
_ => panic!("unexpected counter aggregation"),
|
||||
},
|
||||
_ => panic!("unexpected counter data type"),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_turn_network_proxy_metric_records_active_turn() {
|
||||
let session_telemetry = test_session_telemetry();
|
||||
|
||||
emit_turn_network_proxy_metric(&session_telemetry, true, ("tmp_mem_enabled", "true"));
|
||||
|
||||
let snapshot = session_telemetry
|
||||
.snapshot_metrics()
|
||||
.expect("runtime metrics snapshot");
|
||||
let (attrs, value) = metric_point(&snapshot);
|
||||
|
||||
assert_eq!(value, 1);
|
||||
assert_eq!(
|
||||
attrs,
|
||||
BTreeMap::from([
|
||||
("active".to_string(), "true".to_string()),
|
||||
("tmp_mem_enabled".to_string(), "true".to_string()),
|
||||
])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn emit_turn_network_proxy_metric_records_inactive_turn() {
|
||||
let session_telemetry = test_session_telemetry();
|
||||
|
||||
emit_turn_network_proxy_metric(&session_telemetry, false, ("tmp_mem_enabled", "false"));
|
||||
|
||||
let snapshot = session_telemetry
|
||||
.snapshot_metrics()
|
||||
.expect("runtime metrics snapshot");
|
||||
let (attrs, value) = metric_point(&snapshot);
|
||||
|
||||
assert_eq!(value, 1);
|
||||
assert_eq!(
|
||||
attrs,
|
||||
BTreeMap::from([
|
||||
("active".to_string(), "false".to_string()),
|
||||
("tmp_mem_enabled".to_string(), "false".to_string()),
|
||||
])
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ pub const RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC: &str =
|
|||
pub const TURN_E2E_DURATION_METRIC: &str = "codex.turn.e2e_duration_ms";
|
||||
pub const TURN_TTFT_DURATION_METRIC: &str = "codex.turn.ttft.duration_ms";
|
||||
pub const TURN_TTFM_DURATION_METRIC: &str = "codex.turn.ttfm.duration_ms";
|
||||
pub const TURN_NETWORK_PROXY_METRIC: &str = "codex.turn.network_proxy";
|
||||
pub const TURN_TOOL_CALL_METRIC: &str = "codex.turn.tool.call";
|
||||
pub const TURN_TOKEN_USAGE_METRIC: &str = "codex.turn.token_usage";
|
||||
pub const THREAD_STARTED_METRIC: &str = "codex.thread.started";
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue