core-agent-ide/codex-rs/otel/src/lib.rs
2026-02-09 14:29:19 -08:00

238 lines
7.2 KiB
Rust

pub mod config;
pub mod metrics;
pub mod otel_provider;
pub mod traces;
mod otlp;
use crate::metrics::MetricsClient;
use crate::metrics::MetricsConfig;
use crate::metrics::MetricsError;
use crate::metrics::Result as MetricsResult;
pub use crate::metrics::timer::Timer;
use crate::metrics::validation::validate_tag_key;
use crate::metrics::validation::validate_tag_value;
use crate::otel_provider::OtelProvider;
use codex_protocol::ThreadId;
pub use codex_utils_string::sanitize_metric_tag_value;
use opentelemetry_sdk::metrics::data::ResourceMetrics;
use serde::Serialize;
use std::time::Duration;
use strum_macros::Display;
use tracing::debug;
pub use crate::metrics::runtime_metrics::RuntimeMetricTotals;
pub use crate::metrics::runtime_metrics::RuntimeMetricsSummary;
#[derive(Debug, Clone, Serialize, Display)]
#[serde(rename_all = "snake_case")]
pub enum ToolDecisionSource {
Config,
User,
}
/// Maps to core AuthMode to avoid a circular dependency on codex-core.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Display)]
pub enum TelemetryAuthMode {
ApiKey,
Chatgpt,
}
#[derive(Debug, Clone)]
pub struct OtelEventMetadata {
pub(crate) conversation_id: ThreadId,
pub(crate) auth_mode: Option<String>,
pub(crate) account_id: Option<String>,
pub(crate) account_email: Option<String>,
pub(crate) originator: String,
pub(crate) session_source: String,
pub(crate) model: String,
pub(crate) slug: String,
pub(crate) log_user_prompts: bool,
pub(crate) app_version: &'static str,
pub(crate) terminal_type: String,
}
#[derive(Debug, Clone)]
pub struct OtelManager {
pub(crate) metadata: OtelEventMetadata,
pub(crate) metrics: Option<MetricsClient>,
pub(crate) metrics_use_metadata_tags: bool,
}
impl OtelManager {
pub fn with_model(mut self, model: &str, slug: &str) -> Self {
self.metadata.model = model.to_owned();
self.metadata.slug = slug.to_owned();
self
}
pub fn with_metrics(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = true;
self
}
pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = false;
self
}
pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult<Self> {
let metrics = MetricsClient::new(config)?;
Ok(self.with_metrics(metrics))
}
pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self {
match provider.metrics() {
Some(metrics) => self.with_metrics(metrics.clone()),
None => self,
}
}
pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.counter(name, inc, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics counter [{name}] failed: {e}");
}
}
pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.histogram(name, value, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics histogram [{name}] failed: {e}");
}
}
pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.record_duration(name, duration, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics duration [{name}] failed: {e}");
}
}
pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result<Timer, MetricsError> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
let tags = self.tags_with_metadata(tags)?;
metrics.start_timer(name, &tags)
}
pub fn shutdown_metrics(&self) -> MetricsResult<()> {
let Some(metrics) = &self.metrics else {
return Ok(());
};
metrics.shutdown()
}
pub fn snapshot_metrics(&self) -> MetricsResult<ResourceMetrics> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
metrics.snapshot()
}
/// Collect and discard a runtime metrics snapshot to reset delta accumulators.
pub fn reset_runtime_metrics(&self) {
if self.metrics.is_none() {
return;
}
if let Err(err) = self.snapshot_metrics() {
debug!("runtime metrics reset skipped: {err}");
}
}
/// Collect a runtime metrics summary if debug snapshots are available.
pub fn runtime_metrics_summary(&self) -> Option<RuntimeMetricsSummary> {
let snapshot = match self.snapshot_metrics() {
Ok(snapshot) => snapshot,
Err(_) => {
return None;
}
};
let summary = RuntimeMetricsSummary::from_snapshot(&snapshot);
if summary.is_empty() {
None
} else {
Some(summary)
}
}
fn tags_with_metadata<'a>(
&'a self,
tags: &'a [(&'a str, &'a str)],
) -> MetricsResult<Vec<(&'a str, &'a str)>> {
let mut merged = self.metadata_tag_refs()?;
merged.extend(tags.iter().copied());
Ok(merged)
}
fn metadata_tag_refs(&self) -> MetricsResult<Vec<(&str, &str)>> {
if !self.metrics_use_metadata_tags {
return Ok(Vec::new());
}
let mut tags = Vec::with_capacity(6);
Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?;
Self::push_metadata_tag(
&mut tags,
"session_source",
Some(self.metadata.session_source.as_str()),
)?;
Self::push_metadata_tag(
&mut tags,
"originator",
Some(self.metadata.originator.as_str()),
)?;
Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?;
Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?;
Ok(tags)
}
fn push_metadata_tag<'a>(
tags: &mut Vec<(&'a str, &'a str)>,
key: &'static str,
value: Option<&'a str>,
) -> MetricsResult<()> {
let Some(value) = value else {
return Ok(());
};
validate_tag_key(key)?;
validate_tag_value(value)?;
tags.push((key, value));
Ok(())
}
}
/// Start a metrics timer using the globally installed metrics client.
pub fn start_global_timer(name: &str, tags: &[(&str, &str)]) -> MetricsResult<Timer> {
let Some(metrics) = crate::metrics::global() else {
return Err(MetricsError::ExporterDisabled);
};
metrics.start_timer(name, tags)
}