diff --git a/codex-rs/otel/README.md b/codex-rs/otel/README.md index 0194eea7f..02d4fc3a0 100644 --- a/codex-rs/otel/README.md +++ b/codex-rs/otel/README.md @@ -2,10 +2,11 @@ `codex-otel` is the OpenTelemetry integration crate for Codex. It provides: -- Trace/log/metrics exporters and tracing subscriber layers (`codex_otel::otel_provider`). -- A structured event helper (`codex_otel::OtelManager`). -- OpenTelemetry metrics support via OTLP exporters (`codex_otel::metrics`). -- A metrics facade on `OtelManager` so tracing + metrics share metadata. +- Provider wiring for log/trace/metric exporters (`codex_otel::OtelProvider`, + `codex_otel::provider`, and the compatibility shim `codex_otel::otel_provider`). +- Session-scoped business event emission via `codex_otel::OtelManager`. +- Low-level metrics APIs via `codex_otel::metrics`. +- Trace-context helpers via `codex_otel::trace_context` and crate-root re-exports. ## Tracing and logs @@ -17,7 +18,7 @@ registry: use codex_otel::config::OtelExporter; use codex_otel::config::OtelHttpProtocol; use codex_otel::config::OtelSettings; -use codex_otel::otel_provider::OtelProvider; +use codex_otel::OtelProvider; use tracing_subscriber::prelude::*; let settings = OtelSettings { @@ -51,7 +52,8 @@ if let Some(provider) = OtelProvider::from(&settings)? { ## OtelManager (events) `OtelManager` adds consistent metadata to tracing events and helps record -Codex-specific events. +Codex-specific session events. Rich session/business events should go through +`OtelManager`; subsystem-owned audit events can stay with the owning subsystem. ```rust use codex_otel::OtelManager; @@ -63,6 +65,7 @@ let manager = OtelManager::new( account_id, account_email, auth_mode, + originator, log_user_prompts, terminal_type, session_source, @@ -119,6 +122,15 @@ metrics.counter("codex.turns", 1, &[("model", "gpt-5.1")])?; metrics.shutdown()?; // flushes in-memory exporter ``` +## Trace context + +Trace propagation helpers remain separate from the session event emitter: + +```rust +use codex_otel::current_span_w3c_trace_context; +use codex_otel::set_parent_from_w3c_trace_context; +``` + ## Shutdown - `OtelProvider::shutdown()` stops the OTEL exporter. diff --git a/codex-rs/otel/src/events/mod.rs b/codex-rs/otel/src/events/mod.rs new file mode 100644 index 000000000..cd2c922e5 --- /dev/null +++ b/codex-rs/otel/src/events/mod.rs @@ -0,0 +1,2 @@ +pub(crate) mod otel_manager; +pub(crate) mod shared; diff --git a/codex-rs/otel/src/traces/otel_manager.rs b/codex-rs/otel/src/events/otel_manager.rs similarity index 79% rename from codex-rs/otel/src/traces/otel_manager.rs rename to codex-rs/otel/src/events/otel_manager.rs index ab0cc6ff0..bfe477f56 100644 --- a/codex-rs/otel/src/traces/otel_manager.rs +++ b/codex-rs/otel/src/events/otel_manager.rs @@ -1,6 +1,12 @@ -use crate::OTEL_LOG_ONLY_TARGET; -use crate::OTEL_TRACE_SAFE_TARGET; use crate::TelemetryAuthMode; +use crate::ToolDecisionSource; +use crate::events::shared::log_and_trace_event; +use crate::events::shared::log_event; +use crate::events::shared::trace_event; +use crate::metrics::MetricsClient; +use crate::metrics::MetricsConfig; +use crate::metrics::MetricsError; +use crate::metrics::Result as MetricsResult; use crate::metrics::names::API_CALL_COUNT_METRIC; use crate::metrics::names::API_CALL_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC; @@ -17,9 +23,12 @@ use crate::metrics::names::WEBSOCKET_EVENT_COUNT_METRIC; use crate::metrics::names::WEBSOCKET_EVENT_DURATION_METRIC; use crate::metrics::names::WEBSOCKET_REQUEST_COUNT_METRIC; use crate::metrics::names::WEBSOCKET_REQUEST_DURATION_METRIC; +use crate::metrics::runtime_metrics::RuntimeMetricsSummary; +use crate::metrics::timer::Timer; +use crate::metrics::validation::validate_tag_key; +use crate::metrics::validation::validate_tag_value; +use crate::provider::OtelProvider; use crate::sanitize_metric_tag_value; -use chrono::SecondsFormat; -use chrono::Utc; use codex_api::ApiError; use codex_api::ResponseEvent; use codex_protocol::ThreadId; @@ -33,20 +42,16 @@ use codex_protocol::protocol::SessionSource; use codex_protocol::user_input::UserInput; use eventsource_stream::Event as StreamEvent; use eventsource_stream::EventStreamError as StreamError; +use opentelemetry_sdk::metrics::data::ResourceMetrics; use reqwest::Error; use reqwest::Response; use std::borrow::Cow; -use std::fmt::Display; use std::future::Future; use std::time::Duration; use std::time::Instant; use tokio::time::error::Elapsed; use tracing::Span; -pub use crate::OtelEventMetadata; -pub use crate::OtelManager; -pub use crate::ToolDecisionSource; - const SSE_UNKNOWN_KIND: &str = "unknown"; const WEBSOCKET_UNKNOWN_KIND: &str = "unknown"; const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing"; @@ -58,85 +63,207 @@ const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms"; const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms"; -macro_rules! log_event { - ($self:expr, $($fields:tt)*) => {{ - tracing::event!( - target: OTEL_LOG_ONLY_TARGET, - tracing::Level::INFO, - $($fields)* - event.timestamp = %timestamp(), - conversation.id = %$self.metadata.conversation_id, - app.version = %$self.metadata.app_version, - auth_mode = $self.metadata.auth_mode, - originator = %$self.metadata.originator, - user.account_id = $self.metadata.account_id, - user.email = $self.metadata.account_email, - terminal.type = %$self.metadata.terminal_type, - model = %$self.metadata.model, - slug = %$self.metadata.slug, - ); - }}; +#[derive(Debug, Clone)] +pub struct OtelEventMetadata { + pub(crate) conversation_id: ThreadId, + pub(crate) auth_mode: Option, + pub(crate) account_id: Option, + pub(crate) account_email: Option, + pub(crate) originator: String, + pub(crate) service_name: Option, + pub(crate) session_source: String, + pub(crate) model: String, + pub(crate) slug: String, + pub(crate) log_user_prompts: bool, + pub(crate) app_version: &'static str, + pub(crate) terminal_type: String, } -macro_rules! trace_event { - ($self:expr, $($fields:tt)*) => {{ - tracing::event!( - target: OTEL_TRACE_SAFE_TARGET, - tracing::Level::INFO, - $($fields)* - event.timestamp = %timestamp(), - conversation.id = %$self.metadata.conversation_id, - app.version = %$self.metadata.app_version, - auth_mode = $self.metadata.auth_mode, - originator = %$self.metadata.originator, - terminal.type = %$self.metadata.terminal_type, - model = %$self.metadata.model, - slug = %$self.metadata.slug, - ); - }}; -} - -macro_rules! log_and_trace_event { - ( - $self:expr, - common: { $($common:tt)* }, - log: { $($log:tt)* }, - trace: { $($trace:tt)* }, - ) => {{ - tracing::event!( - target: OTEL_LOG_ONLY_TARGET, - tracing::Level::INFO, - $($common)* - $($log)* - event.timestamp = %timestamp(), - conversation.id = %$self.metadata.conversation_id, - app.version = %$self.metadata.app_version, - auth_mode = $self.metadata.auth_mode, - originator = %$self.metadata.originator, - user.account_id = $self.metadata.account_id, - user.email = $self.metadata.account_email, - terminal.type = %$self.metadata.terminal_type, - model = %$self.metadata.model, - slug = %$self.metadata.slug, - ); - tracing::event!( - target: OTEL_TRACE_SAFE_TARGET, - tracing::Level::INFO, - $($common)* - $($trace)* - event.timestamp = %timestamp(), - conversation.id = %$self.metadata.conversation_id, - app.version = %$self.metadata.app_version, - auth_mode = $self.metadata.auth_mode, - originator = %$self.metadata.originator, - terminal.type = %$self.metadata.terminal_type, - model = %$self.metadata.model, - slug = %$self.metadata.slug, - ); - }}; +#[derive(Debug, Clone)] +pub struct OtelManager { + pub(crate) metadata: OtelEventMetadata, + pub(crate) metrics: Option, + pub(crate) metrics_use_metadata_tags: bool, } impl OtelManager { + pub fn with_model(mut self, model: &str, slug: &str) -> Self { + self.metadata.model = model.to_owned(); + self.metadata.slug = slug.to_owned(); + self + } + + pub fn with_metrics_service_name(mut self, service_name: &str) -> Self { + self.metadata.service_name = Some(sanitize_metric_tag_value(service_name)); + self + } + + pub fn with_metrics(mut self, metrics: MetricsClient) -> Self { + self.metrics = Some(metrics); + self.metrics_use_metadata_tags = true; + self + } + + pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self { + self.metrics = Some(metrics); + self.metrics_use_metadata_tags = false; + self + } + + pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult { + let metrics = MetricsClient::new(config)?; + Ok(self.with_metrics(metrics)) + } + + pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self { + match provider.metrics() { + Some(metrics) => self.with_metrics(metrics.clone()), + None => self, + } + } + + pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.counter(name, inc, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics counter [{name}] failed: {e}"); + } + } + + pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.histogram(name, value, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics histogram [{name}] failed: {e}"); + } + } + + pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) { + let res: MetricsResult<()> = (|| { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + + let tags = self.tags_with_metadata(tags)?; + metrics.record_duration(name, duration, &tags) + })(); + + if let Err(e) = res { + tracing::warn!("metrics duration [{name}] failed: {e}"); + } + } + + pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result { + let Some(metrics) = &self.metrics else { + return Err(MetricsError::ExporterDisabled); + }; + let tags = self.tags_with_metadata(tags)?; + metrics.start_timer(name, &tags) + } + + pub fn shutdown_metrics(&self) -> MetricsResult<()> { + let Some(metrics) = &self.metrics else { + return Ok(()); + }; + metrics.shutdown() + } + + pub fn snapshot_metrics(&self) -> MetricsResult { + let Some(metrics) = &self.metrics else { + return Err(MetricsError::ExporterDisabled); + }; + metrics.snapshot() + } + + /// Collect and discard a runtime metrics snapshot to reset delta accumulators. + pub fn reset_runtime_metrics(&self) { + if self.metrics.is_none() { + return; + } + if let Err(err) = self.snapshot_metrics() { + tracing::debug!("runtime metrics reset skipped: {err}"); + } + } + + /// Collect a runtime metrics summary if debug snapshots are available. + pub fn runtime_metrics_summary(&self) -> Option { + let snapshot = match self.snapshot_metrics() { + Ok(snapshot) => snapshot, + Err(_) => { + return None; + } + }; + let summary = RuntimeMetricsSummary::from_snapshot(&snapshot); + if summary.is_empty() { + None + } else { + Some(summary) + } + } + + fn tags_with_metadata<'a>( + &'a self, + tags: &'a [(&'a str, &'a str)], + ) -> MetricsResult> { + let mut merged = self.metadata_tag_refs()?; + merged.extend(tags.iter().copied()); + Ok(merged) + } + + fn metadata_tag_refs(&self) -> MetricsResult> { + if !self.metrics_use_metadata_tags { + return Ok(Vec::new()); + } + let mut tags = Vec::with_capacity(7); + Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?; + Self::push_metadata_tag( + &mut tags, + "session_source", + Some(self.metadata.session_source.as_str()), + )?; + Self::push_metadata_tag( + &mut tags, + "originator", + Some(self.metadata.originator.as_str()), + )?; + Self::push_metadata_tag( + &mut tags, + "service_name", + self.metadata.service_name.as_deref(), + )?; + Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?; + Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?; + Ok(tags) + } + + fn push_metadata_tag<'a>( + tags: &mut Vec<(&'a str, &'a str)>, + key: &'static str, + value: Option<&'a str>, + ) -> MetricsResult<()> { + let Some(value) = value else { + return Ok(()); + }; + validate_tag_key(key)?; + validate_tag_value(value)?; + tags.push((key, value)); + Ok(()) + } + #[allow(clippy::too_many_arguments)] pub fn new( conversation_id: ThreadId, @@ -176,13 +303,13 @@ impl OtelManager { match event { ResponseEvent::OutputItemDone(item) => { handle_responses_span.record("from", "output_item_done"); - if let ResponseItem::FunctionCall { name, .. } = &item { + if let ResponseItem::FunctionCall { name, .. } = item { handle_responses_span.record("tool_name", name.as_str()); } } ResponseEvent::OutputItemAdded(item) => { handle_responses_span.record("from", "output_item_added"); - if let ResponseItem::FunctionCall { name, .. } = &item { + if let ResponseItem::FunctionCall { name, .. } = item { handle_responses_span.record("tool_name", name.as_str()); } } @@ -406,7 +533,7 @@ impl OtelManager { response: &Result>>, Elapsed>, duration: Duration, ) where - E: Display, + E: std::fmt::Display, { match response { Ok(Some(Ok(sse))) => { @@ -469,7 +596,7 @@ impl OtelManager { pub fn sse_event_failed(&self, kind: Option<&String>, duration: Duration, error: &T) where - T: Display, + T: std::fmt::Display, { let kind_str = kind.map_or(SSE_UNKNOWN_KIND, String::as_str); self.counter( @@ -508,7 +635,7 @@ impl OtelManager { pub fn see_event_completed_failed(&self, error: &T) where - T: Display, + T: std::fmt::Display, { log_and_trace_event!( self, @@ -620,7 +747,7 @@ impl OtelManager { where F: FnOnce() -> Fut, Fut: Future>, - E: Display, + E: std::fmt::Display, { let start = Instant::now(); let result = f().await; @@ -774,8 +901,9 @@ impl OtelManager { fn responses_type(event: &ResponseEvent) -> String { match event { ResponseEvent::Created => "created".into(), - ResponseEvent::OutputItemDone(item) => OtelManager::responses_item_type(item), - ResponseEvent::OutputItemAdded(item) => OtelManager::responses_item_type(item), + ResponseEvent::OutputItemDone(item) | ResponseEvent::OutputItemAdded(item) => { + OtelManager::responses_item_type(item) + } ResponseEvent::Completed { .. } => "completed".into(), ResponseEvent::OutputTextDelta(_) => "text_delta".into(), ResponseEvent::ReasoningSummaryDelta { .. } => "reasoning_summary_delta".into(), @@ -808,10 +936,6 @@ impl OtelManager { } } -fn timestamp() -> String { - Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true) -} - fn duration_from_ms_value(value: Option<&serde_json::Value>) -> Option { let value = value?; let ms = value diff --git a/codex-rs/otel/src/events/shared.rs b/codex-rs/otel/src/events/shared.rs new file mode 100644 index 000000000..08e369316 --- /dev/null +++ b/codex-rs/otel/src/events/shared.rs @@ -0,0 +1,60 @@ +use chrono::SecondsFormat; +use chrono::Utc; + +macro_rules! log_event { + ($self:expr, $($fields:tt)*) => {{ + tracing::event!( + target: $crate::targets::OTEL_LOG_ONLY_TARGET, + tracing::Level::INFO, + $($fields)* + event.timestamp = %$crate::events::shared::timestamp(), + conversation.id = %$self.metadata.conversation_id, + app.version = %$self.metadata.app_version, + auth_mode = $self.metadata.auth_mode, + originator = %$self.metadata.originator, + user.account_id = $self.metadata.account_id, + user.email = $self.metadata.account_email, + terminal.type = %$self.metadata.terminal_type, + model = %$self.metadata.model, + slug = %$self.metadata.slug, + ); + }}; +} + +macro_rules! trace_event { + ($self:expr, $($fields:tt)*) => {{ + tracing::event!( + target: $crate::targets::OTEL_TRACE_SAFE_TARGET, + tracing::Level::INFO, + $($fields)* + event.timestamp = %$crate::events::shared::timestamp(), + conversation.id = %$self.metadata.conversation_id, + app.version = %$self.metadata.app_version, + auth_mode = $self.metadata.auth_mode, + originator = %$self.metadata.originator, + terminal.type = %$self.metadata.terminal_type, + model = %$self.metadata.model, + slug = %$self.metadata.slug, + ); + }}; +} + +macro_rules! log_and_trace_event { + ( + $self:expr, + common: { $($common:tt)* }, + log: { $($log:tt)* }, + trace: { $($trace:tt)* }, + ) => {{ + log_event!($self, $($common)* $($log)*); + trace_event!($self, $($common)* $($trace)*); + }}; +} + +pub(crate) use log_and_trace_event; +pub(crate) use log_event; +pub(crate) use trace_event; + +pub(crate) fn timestamp() -> String { + Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true) +} diff --git a/codex-rs/otel/src/lib.rs b/codex-rs/otel/src/lib.rs index 4f36966b9..5594b26a7 100644 --- a/codex-rs/otel/src/lib.rs +++ b/codex-rs/otel/src/lib.rs @@ -1,39 +1,31 @@ pub mod config; +mod events; pub mod metrics; pub mod otel_provider; +pub mod provider; pub mod trace_context; -pub mod traces; mod otlp; +mod targets; -use crate::metrics::MetricsClient; -use crate::metrics::MetricsConfig; use crate::metrics::MetricsError; use crate::metrics::Result as MetricsResult; -pub use crate::metrics::timer::Timer; -use crate::metrics::validation::validate_tag_key; -use crate::metrics::validation::validate_tag_value; -use crate::otel_provider::OtelProvider; -use codex_protocol::ThreadId; -pub use codex_utils_string::sanitize_metric_tag_value; -use opentelemetry_sdk::metrics::data::ResourceMetrics; use serde::Serialize; -use std::time::Duration; use strum_macros::Display; -use tracing::debug; +pub use crate::events::otel_manager::OtelEventMetadata; +pub use crate::events::otel_manager::OtelManager; pub use crate::metrics::runtime_metrics::RuntimeMetricTotals; pub use crate::metrics::runtime_metrics::RuntimeMetricsSummary; -pub use crate::otel_provider::traceparent_context_from_env; +pub use crate::metrics::timer::Timer; +pub use crate::provider::OtelProvider; pub use crate::trace_context::context_from_w3c_trace_context; pub use crate::trace_context::current_span_trace_id; pub use crate::trace_context::current_span_w3c_trace_context; pub use crate::trace_context::set_parent_from_context; pub use crate::trace_context::set_parent_from_w3c_trace_context; - -pub(crate) const OTEL_TARGET_PREFIX: &str = "codex_otel"; -pub(crate) const OTEL_LOG_ONLY_TARGET: &str = "codex_otel.log_only"; -pub(crate) const OTEL_TRACE_SAFE_TARGET: &str = "codex_otel.trace_safe"; +pub use crate::trace_context::traceparent_context_from_env; +pub use codex_utils_string::sanitize_metric_tag_value; #[derive(Debug, Clone, Serialize, Display)] #[serde(rename_all = "snake_case")] @@ -49,208 +41,6 @@ pub enum TelemetryAuthMode { Chatgpt, } -#[derive(Debug, Clone)] -pub struct OtelEventMetadata { - pub(crate) conversation_id: ThreadId, - pub(crate) auth_mode: Option, - pub(crate) account_id: Option, - pub(crate) account_email: Option, - pub(crate) originator: String, - pub(crate) service_name: Option, - pub(crate) session_source: String, - pub(crate) model: String, - pub(crate) slug: String, - pub(crate) log_user_prompts: bool, - pub(crate) app_version: &'static str, - pub(crate) terminal_type: String, -} - -#[derive(Debug, Clone)] -pub struct OtelManager { - pub(crate) metadata: OtelEventMetadata, - pub(crate) metrics: Option, - pub(crate) metrics_use_metadata_tags: bool, -} - -impl OtelManager { - pub fn with_model(mut self, model: &str, slug: &str) -> Self { - self.metadata.model = model.to_owned(); - self.metadata.slug = slug.to_owned(); - self - } - - pub fn with_metrics_service_name(mut self, service_name: &str) -> Self { - self.metadata.service_name = Some(sanitize_metric_tag_value(service_name)); - self - } - - pub fn with_metrics(mut self, metrics: MetricsClient) -> Self { - self.metrics = Some(metrics); - self.metrics_use_metadata_tags = true; - self - } - - pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self { - self.metrics = Some(metrics); - self.metrics_use_metadata_tags = false; - self - } - - pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult { - let metrics = MetricsClient::new(config)?; - Ok(self.with_metrics(metrics)) - } - - pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self { - match provider.metrics() { - Some(metrics) => self.with_metrics(metrics.clone()), - None => self, - } - } - - pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) { - let res: MetricsResult<()> = (|| { - let Some(metrics) = &self.metrics else { - return Ok(()); - }; - - let tags = self.tags_with_metadata(tags)?; - metrics.counter(name, inc, &tags) - })(); - - if let Err(e) = res { - tracing::warn!("metrics counter [{name}] failed: {e}"); - } - } - - pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) { - let res: MetricsResult<()> = (|| { - let Some(metrics) = &self.metrics else { - return Ok(()); - }; - - let tags = self.tags_with_metadata(tags)?; - metrics.histogram(name, value, &tags) - })(); - - if let Err(e) = res { - tracing::warn!("metrics histogram [{name}] failed: {e}"); - } - } - - pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) { - let res: MetricsResult<()> = (|| { - let Some(metrics) = &self.metrics else { - return Ok(()); - }; - - let tags = self.tags_with_metadata(tags)?; - metrics.record_duration(name, duration, &tags) - })(); - - if let Err(e) = res { - tracing::warn!("metrics duration [{name}] failed: {e}"); - } - } - - pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result { - let Some(metrics) = &self.metrics else { - return Err(MetricsError::ExporterDisabled); - }; - let tags = self.tags_with_metadata(tags)?; - metrics.start_timer(name, &tags) - } - - pub fn shutdown_metrics(&self) -> MetricsResult<()> { - let Some(metrics) = &self.metrics else { - return Ok(()); - }; - metrics.shutdown() - } - - pub fn snapshot_metrics(&self) -> MetricsResult { - let Some(metrics) = &self.metrics else { - return Err(MetricsError::ExporterDisabled); - }; - metrics.snapshot() - } - - /// Collect and discard a runtime metrics snapshot to reset delta accumulators. - pub fn reset_runtime_metrics(&self) { - if self.metrics.is_none() { - return; - } - if let Err(err) = self.snapshot_metrics() { - debug!("runtime metrics reset skipped: {err}"); - } - } - - /// Collect a runtime metrics summary if debug snapshots are available. - pub fn runtime_metrics_summary(&self) -> Option { - let snapshot = match self.snapshot_metrics() { - Ok(snapshot) => snapshot, - Err(_) => { - return None; - } - }; - let summary = RuntimeMetricsSummary::from_snapshot(&snapshot); - if summary.is_empty() { - None - } else { - Some(summary) - } - } - - fn tags_with_metadata<'a>( - &'a self, - tags: &'a [(&'a str, &'a str)], - ) -> MetricsResult> { - let mut merged = self.metadata_tag_refs()?; - merged.extend(tags.iter().copied()); - Ok(merged) - } - - fn metadata_tag_refs(&self) -> MetricsResult> { - if !self.metrics_use_metadata_tags { - return Ok(Vec::new()); - } - let mut tags = Vec::with_capacity(7); - Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?; - Self::push_metadata_tag( - &mut tags, - "session_source", - Some(self.metadata.session_source.as_str()), - )?; - Self::push_metadata_tag( - &mut tags, - "originator", - Some(self.metadata.originator.as_str()), - )?; - Self::push_metadata_tag( - &mut tags, - "service_name", - self.metadata.service_name.as_deref(), - )?; - Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?; - Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?; - Ok(tags) - } - - fn push_metadata_tag<'a>( - tags: &mut Vec<(&'a str, &'a str)>, - key: &'static str, - value: Option<&'a str>, - ) -> MetricsResult<()> { - let Some(value) = value else { - return Ok(()); - }; - validate_tag_key(key)?; - validate_tag_value(value)?; - tags.push((key, value)); - Ok(()) - } -} - /// Start a metrics timer using the globally installed metrics client. pub fn start_global_timer(name: &str, tags: &[(&str, &str)]) -> MetricsResult { let Some(metrics) = crate::metrics::global() else { diff --git a/codex-rs/otel/src/otel_provider.rs b/codex-rs/otel/src/otel_provider.rs index 8145813d8..97db9ee8d 100644 --- a/codex-rs/otel/src/otel_provider.rs +++ b/codex-rs/otel/src/otel_provider.rs @@ -1,493 +1,4 @@ -use crate::OTEL_TARGET_PREFIX; -use crate::OTEL_TRACE_SAFE_TARGET; -use crate::config::OtelExporter; -use crate::config::OtelHttpProtocol; -use crate::config::OtelSettings; -use crate::metrics::MetricsClient; -use crate::metrics::MetricsConfig; -use crate::trace_context::context_from_trace_headers; -use gethostname::gethostname; -use opentelemetry::Context; -use opentelemetry::KeyValue; -use opentelemetry::global; -use opentelemetry::trace::TracerProvider as _; -use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; -use opentelemetry_otlp::LogExporter; -use opentelemetry_otlp::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT; -use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT; -use opentelemetry_otlp::Protocol; -use opentelemetry_otlp::SpanExporter; -use opentelemetry_otlp::WithExportConfig; -use opentelemetry_otlp::WithHttpConfig; -use opentelemetry_otlp::WithTonicConfig; -use opentelemetry_otlp::tonic_types::metadata::MetadataMap; -use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; -use opentelemetry_sdk::Resource; -use opentelemetry_sdk::logs::SdkLoggerProvider; -use opentelemetry_sdk::propagation::TraceContextPropagator; -use opentelemetry_sdk::trace::BatchSpanProcessor; -use opentelemetry_sdk::trace::SdkTracerProvider; -use opentelemetry_sdk::trace::Tracer; -use opentelemetry_semantic_conventions as semconv; -use std::env; -use std::error::Error; -use std::sync::OnceLock; -use tracing::debug; -use tracing::warn; -use tracing_subscriber::Layer; -use tracing_subscriber::registry::LookupSpan; +//! Compatibility shim for `codex_otel::otel_provider`. -const ENV_ATTRIBUTE: &str = "env"; -const HOST_NAME_ATTRIBUTE: &str = "host.name"; -const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT"; -const TRACESTATE_ENV_VAR: &str = "TRACESTATE"; -static TRACEPARENT_CONTEXT: OnceLock> = OnceLock::new(); - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -enum ResourceKind { - Logs, - Traces, -} - -pub struct OtelProvider { - pub logger: Option, - pub tracer_provider: Option, - pub tracer: Option, - pub metrics: Option, -} - -impl OtelProvider { - pub fn shutdown(&self) { - if let Some(logger) = &self.logger { - let _ = logger.shutdown(); - } - if let Some(tracer_provider) = &self.tracer_provider { - let _ = tracer_provider.shutdown(); - } - if let Some(metrics) = &self.metrics { - let _ = metrics.shutdown(); - } - } - - pub fn from(settings: &OtelSettings) -> Result, Box> { - let log_enabled = !matches!(settings.exporter, OtelExporter::None); - let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None); - - let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter); - let metrics = if matches!(metric_exporter, OtelExporter::None) { - None - } else { - let mut config = MetricsConfig::otlp( - settings.environment.clone(), - settings.service_name.clone(), - settings.service_version.clone(), - metric_exporter, - ); - if settings.runtime_metrics { - config = config.with_runtime_reader(); - } - Some(MetricsClient::new(config)?) - }; - - if let Some(metrics) = metrics.as_ref() { - crate::metrics::install_global(metrics.clone()); - } - - if !log_enabled && !trace_enabled && metrics.is_none() { - debug!("No OTEL exporter enabled in settings."); - return Ok(None); - } - - let log_resource = make_resource(settings, ResourceKind::Logs); - let trace_resource = make_resource(settings, ResourceKind::Traces); - let logger = log_enabled - .then(|| build_logger(&log_resource, &settings.exporter)) - .transpose()?; - - let tracer_provider = trace_enabled - .then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter)) - .transpose()?; - - let tracer = tracer_provider - .as_ref() - .map(|provider| provider.tracer(settings.service_name.clone())); - - if let Some(provider) = tracer_provider.clone() { - global::set_tracer_provider(provider); - global::set_text_map_propagator(TraceContextPropagator::new()); - } - Ok(Some(Self { - logger, - tracer_provider, - tracer, - metrics, - })) - } - - pub fn logger_layer(&self) -> Option + Send + Sync> - where - S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync, - { - self.logger.as_ref().map(|logger| { - OpenTelemetryTracingBridge::new(logger).with_filter( - tracing_subscriber::filter::filter_fn(OtelProvider::log_export_filter), - ) - }) - } - - pub fn tracing_layer(&self) -> Option + Send + Sync> - where - S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync, - { - self.tracer.as_ref().map(|tracer| { - tracing_opentelemetry::layer() - .with_tracer(tracer.clone()) - .with_filter(tracing_subscriber::filter::filter_fn( - OtelProvider::trace_export_filter, - )) - }) - } - - pub fn codex_export_filter(meta: &tracing::Metadata<'_>) -> bool { - Self::log_export_filter(meta) - } - - pub fn log_export_filter(meta: &tracing::Metadata<'_>) -> bool { - is_log_export_target(meta.target()) - } - - pub fn trace_export_filter(meta: &tracing::Metadata<'_>) -> bool { - meta.is_span() || is_trace_safe_target(meta.target()) - } - - pub fn metrics(&self) -> Option<&MetricsClient> { - self.metrics.as_ref() - } -} - -impl Drop for OtelProvider { - fn drop(&mut self) { - if let Some(logger) = &self.logger { - let _ = logger.shutdown(); - } - if let Some(tracer_provider) = &self.tracer_provider { - let _ = tracer_provider.shutdown(); - } - if let Some(metrics) = &self.metrics { - let _ = metrics.shutdown(); - } - } -} - -pub fn traceparent_context_from_env() -> Option { - TRACEPARENT_CONTEXT - .get_or_init(load_traceparent_context) - .clone() -} - -fn load_traceparent_context() -> Option { - let traceparent = env::var(TRACEPARENT_ENV_VAR).ok()?; - let tracestate = env::var(TRACESTATE_ENV_VAR).ok(); - - match context_from_trace_headers(Some(&traceparent), tracestate.as_deref()) { - Some(context) => { - debug!("TRACEPARENT detected; continuing trace from parent context"); - Some(context) - } - None => { - warn!("TRACEPARENT is set but invalid; ignoring trace context"); - None - } - } -} - -fn make_resource(settings: &OtelSettings, kind: ResourceKind) -> Resource { - Resource::builder() - .with_service_name(settings.service_name.clone()) - .with_attributes(resource_attributes( - settings, - detected_host_name().as_deref(), - kind, - )) - .build() -} - -fn resource_attributes( - settings: &OtelSettings, - host_name: Option<&str>, - kind: ResourceKind, -) -> Vec { - let mut attributes = vec![ - KeyValue::new( - semconv::attribute::SERVICE_VERSION, - settings.service_version.clone(), - ), - KeyValue::new(ENV_ATTRIBUTE, settings.environment.clone()), - ]; - if kind == ResourceKind::Logs - && let Some(host_name) = host_name.and_then(normalize_host_name) - { - attributes.push(KeyValue::new(HOST_NAME_ATTRIBUTE, host_name)); - } - attributes -} - -fn detected_host_name() -> Option { - let host_name = gethostname(); - normalize_host_name(host_name.to_string_lossy().as_ref()) -} - -fn normalize_host_name(host_name: &str) -> Option { - let host_name = host_name.trim(); - (!host_name.is_empty()).then(|| host_name.to_owned()) -} - -fn is_log_export_target(target: &str) -> bool { - target.starts_with(OTEL_TARGET_PREFIX) && !is_trace_safe_target(target) -} - -fn is_trace_safe_target(target: &str) -> bool { - target.starts_with(OTEL_TRACE_SAFE_TARGET) -} - -fn build_logger( - resource: &Resource, - exporter: &OtelExporter, -) -> Result> { - let mut builder = SdkLoggerProvider::builder().with_resource(resource.clone()); - - match crate::config::resolve_exporter(exporter) { - OtelExporter::None => return Ok(builder.build()), - OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), - OtelExporter::OtlpGrpc { - endpoint, - headers, - tls, - } => { - debug!("Using OTLP Grpc exporter: {endpoint}"); - - let header_map = crate::otlp::build_header_map(&headers); - - let base_tls_config = ClientTlsConfig::new() - .with_enabled_roots() - .assume_http2(true); - - let tls_config = match tls.as_ref() { - Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, - None => base_tls_config, - }; - - let exporter = LogExporter::builder() - .with_tonic() - .with_endpoint(endpoint) - .with_metadata(MetadataMap::from_headers(header_map)) - .with_tls_config(tls_config) - .build()?; - - builder = builder.with_batch_exporter(exporter); - } - OtelExporter::OtlpHttp { - endpoint, - headers, - protocol, - tls, - } => { - debug!("Using OTLP Http exporter: {endpoint}"); - - let protocol = match protocol { - OtelHttpProtocol::Binary => Protocol::HttpBinary, - OtelHttpProtocol::Json => Protocol::HttpJson, - }; - - let mut exporter_builder = LogExporter::builder() - .with_http() - .with_endpoint(endpoint) - .with_protocol(protocol) - .with_headers(headers); - - if let Some(tls) = tls.as_ref() { - let client = crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?; - exporter_builder = exporter_builder.with_http_client(client); - } - - let exporter = exporter_builder.build()?; - - builder = builder.with_batch_exporter(exporter); - } - } - - Ok(builder.build()) -} - -fn build_tracer_provider( - resource: &Resource, - exporter: &OtelExporter, -) -> Result> { - let span_exporter = match crate::config::resolve_exporter(exporter) { - OtelExporter::None => return Ok(SdkTracerProvider::builder().build()), - OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), - OtelExporter::OtlpGrpc { - endpoint, - headers, - tls, - } => { - debug!("Using OTLP Grpc exporter for traces: {endpoint}"); - - let header_map = crate::otlp::build_header_map(&headers); - - let base_tls_config = ClientTlsConfig::new() - .with_enabled_roots() - .assume_http2(true); - - let tls_config = match tls.as_ref() { - Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, - None => base_tls_config, - }; - - SpanExporter::builder() - .with_tonic() - .with_endpoint(endpoint) - .with_metadata(MetadataMap::from_headers(header_map)) - .with_tls_config(tls_config) - .build()? - } - OtelExporter::OtlpHttp { - endpoint, - headers, - protocol, - tls, - } => { - debug!("Using OTLP Http exporter for traces: {endpoint}"); - - let protocol = match protocol { - OtelHttpProtocol::Binary => Protocol::HttpBinary, - OtelHttpProtocol::Json => Protocol::HttpJson, - }; - - let mut exporter_builder = SpanExporter::builder() - .with_http() - .with_endpoint(endpoint) - .with_protocol(protocol) - .with_headers(headers); - - if let Some(tls) = tls.as_ref() { - let client = - crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?; - exporter_builder = exporter_builder.with_http_client(client); - } - - exporter_builder.build()? - } - }; - - let processor = BatchSpanProcessor::builder(span_exporter).build(); - - Ok(SdkTracerProvider::builder() - .with_resource(resource.clone()) - .with_span_processor(processor) - .build()) -} - -#[cfg(test)] -mod tests { - use super::*; - use opentelemetry::trace::SpanId; - use opentelemetry::trace::TraceContextExt; - use opentelemetry::trace::TraceId; - use pretty_assertions::assert_eq; - use std::path::PathBuf; - - #[test] - fn parses_valid_traceparent() { - let trace_id = "00000000000000000000000000000001"; - let span_id = "0000000000000002"; - let context = - context_from_trace_headers(Some(&format!("00-{trace_id}-{span_id}-01")), None) - .expect("trace context"); - let span = context.span(); - let span_context = span.span_context(); - assert_eq!( - span_context.trace_id(), - TraceId::from_hex(trace_id).unwrap() - ); - assert_eq!(span_context.span_id(), SpanId::from_hex(span_id).unwrap()); - assert!(span_context.is_remote()); - } - - #[test] - fn invalid_traceparent_returns_none() { - assert!(context_from_trace_headers(Some("not-a-traceparent"), None).is_none()); - } - - #[test] - fn resource_attributes_include_host_name_when_present() { - let attrs = resource_attributes( - &test_otel_settings(), - Some("opentelemetry-test"), - ResourceKind::Logs, - ); - - let host_name = attrs - .iter() - .find(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) - .map(|kv| kv.value.as_str().to_string()); - - assert_eq!(host_name, Some("opentelemetry-test".to_string())); - } - - #[test] - fn resource_attributes_omit_host_name_when_missing_or_empty() { - let missing = resource_attributes(&test_otel_settings(), None, ResourceKind::Logs); - let empty = resource_attributes(&test_otel_settings(), Some(" "), ResourceKind::Logs); - let trace_attrs = resource_attributes( - &test_otel_settings(), - Some("opentelemetry-test"), - ResourceKind::Traces, - ); - - assert!( - !missing - .iter() - .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) - ); - assert!( - !empty - .iter() - .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) - ); - assert!( - !trace_attrs - .iter() - .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) - ); - } - - #[test] - fn log_export_target_excludes_trace_safe_events() { - assert!(is_log_export_target("codex_otel.log_only")); - assert!(is_log_export_target("codex_otel.network_proxy")); - assert!(!is_log_export_target("codex_otel.trace_safe")); - assert!(!is_log_export_target("codex_otel.trace_safe.debug")); - } - - #[test] - fn trace_export_target_only_includes_trace_safe_prefix() { - assert!(is_trace_safe_target("codex_otel.trace_safe")); - assert!(is_trace_safe_target("codex_otel.trace_safe.summary")); - assert!(!is_trace_safe_target("codex_otel.log_only")); - assert!(!is_trace_safe_target("codex_otel.network_proxy")); - } - - fn test_otel_settings() -> OtelSettings { - OtelSettings { - environment: "test".to_string(), - service_name: "codex-test".to_string(), - service_version: "0.0.0".to_string(), - codex_home: PathBuf::from("."), - exporter: OtelExporter::None, - trace_exporter: OtelExporter::None, - metrics_exporter: OtelExporter::None, - runtime_metrics: false, - } - } -} +pub use crate::provider::*; +pub use crate::trace_context::traceparent_context_from_env; diff --git a/codex-rs/otel/src/provider.rs b/codex-rs/otel/src/provider.rs new file mode 100644 index 000000000..dad09156a --- /dev/null +++ b/codex-rs/otel/src/provider.rs @@ -0,0 +1,430 @@ +use crate::config::OtelExporter; +use crate::config::OtelHttpProtocol; +use crate::config::OtelSettings; +use crate::metrics::MetricsClient; +use crate::metrics::MetricsConfig; +use crate::targets::is_log_export_target; +use crate::targets::is_trace_safe_target; +use gethostname::gethostname; +use opentelemetry::KeyValue; +use opentelemetry::global; +use opentelemetry::trace::TracerProvider as _; +use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge; +use opentelemetry_otlp::LogExporter; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT; +use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT; +use opentelemetry_otlp::Protocol; +use opentelemetry_otlp::SpanExporter; +use opentelemetry_otlp::WithExportConfig; +use opentelemetry_otlp::WithHttpConfig; +use opentelemetry_otlp::WithTonicConfig; +use opentelemetry_otlp::tonic_types::metadata::MetadataMap; +use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig; +use opentelemetry_sdk::Resource; +use opentelemetry_sdk::logs::SdkLoggerProvider; +use opentelemetry_sdk::propagation::TraceContextPropagator; +use opentelemetry_sdk::trace::BatchSpanProcessor; +use opentelemetry_sdk::trace::SdkTracerProvider; +use opentelemetry_sdk::trace::Tracer; +use opentelemetry_semantic_conventions as semconv; +use std::error::Error; +use tracing::debug; +use tracing_subscriber::Layer; +use tracing_subscriber::registry::LookupSpan; + +const ENV_ATTRIBUTE: &str = "env"; +const HOST_NAME_ATTRIBUTE: &str = "host.name"; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum ResourceKind { + Logs, + Traces, +} + +pub struct OtelProvider { + pub logger: Option, + pub tracer_provider: Option, + pub tracer: Option, + pub metrics: Option, +} + +impl OtelProvider { + pub fn shutdown(&self) { + if let Some(logger) = &self.logger { + let _ = logger.shutdown(); + } + if let Some(tracer_provider) = &self.tracer_provider { + let _ = tracer_provider.shutdown(); + } + if let Some(metrics) = &self.metrics { + let _ = metrics.shutdown(); + } + } + + pub fn from(settings: &OtelSettings) -> Result, Box> { + let log_enabled = !matches!(settings.exporter, OtelExporter::None); + let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None); + + let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter); + let metrics = if matches!(metric_exporter, OtelExporter::None) { + None + } else { + let mut config = MetricsConfig::otlp( + settings.environment.clone(), + settings.service_name.clone(), + settings.service_version.clone(), + metric_exporter, + ); + if settings.runtime_metrics { + config = config.with_runtime_reader(); + } + Some(MetricsClient::new(config)?) + }; + + if let Some(metrics) = metrics.as_ref() { + crate::metrics::install_global(metrics.clone()); + } + + if !log_enabled && !trace_enabled && metrics.is_none() { + debug!("No OTEL exporter enabled in settings."); + return Ok(None); + } + + let log_resource = make_resource(settings, ResourceKind::Logs); + let trace_resource = make_resource(settings, ResourceKind::Traces); + let logger = log_enabled + .then(|| build_logger(&log_resource, &settings.exporter)) + .transpose()?; + + let tracer_provider = trace_enabled + .then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter)) + .transpose()?; + + let tracer = tracer_provider + .as_ref() + .map(|provider| provider.tracer(settings.service_name.clone())); + + if let Some(provider) = tracer_provider.clone() { + global::set_tracer_provider(provider); + global::set_text_map_propagator(TraceContextPropagator::new()); + } + Ok(Some(Self { + logger, + tracer_provider, + tracer, + metrics, + })) + } + + pub fn logger_layer(&self) -> Option + Send + Sync> + where + S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync, + { + self.logger.as_ref().map(|logger| { + OpenTelemetryTracingBridge::new(logger).with_filter( + tracing_subscriber::filter::filter_fn(OtelProvider::log_export_filter), + ) + }) + } + + pub fn tracing_layer(&self) -> Option + Send + Sync> + where + S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync, + { + self.tracer.as_ref().map(|tracer| { + tracing_opentelemetry::layer() + .with_tracer(tracer.clone()) + .with_filter(tracing_subscriber::filter::filter_fn( + OtelProvider::trace_export_filter, + )) + }) + } + + pub fn codex_export_filter(meta: &tracing::Metadata<'_>) -> bool { + Self::log_export_filter(meta) + } + + pub fn log_export_filter(meta: &tracing::Metadata<'_>) -> bool { + is_log_export_target(meta.target()) + } + + pub fn trace_export_filter(meta: &tracing::Metadata<'_>) -> bool { + meta.is_span() || is_trace_safe_target(meta.target()) + } + + pub fn metrics(&self) -> Option<&MetricsClient> { + self.metrics.as_ref() + } +} + +impl Drop for OtelProvider { + fn drop(&mut self) { + if let Some(logger) = &self.logger { + let _ = logger.shutdown(); + } + if let Some(tracer_provider) = &self.tracer_provider { + let _ = tracer_provider.shutdown(); + } + if let Some(metrics) = &self.metrics { + let _ = metrics.shutdown(); + } + } +} + +fn make_resource(settings: &OtelSettings, kind: ResourceKind) -> Resource { + Resource::builder() + .with_service_name(settings.service_name.clone()) + .with_attributes(resource_attributes( + settings, + detected_host_name().as_deref(), + kind, + )) + .build() +} + +fn resource_attributes( + settings: &OtelSettings, + host_name: Option<&str>, + kind: ResourceKind, +) -> Vec { + let mut attributes = vec![ + KeyValue::new( + semconv::attribute::SERVICE_VERSION, + settings.service_version.clone(), + ), + KeyValue::new(ENV_ATTRIBUTE, settings.environment.clone()), + ]; + if kind == ResourceKind::Logs + && let Some(host_name) = host_name.and_then(normalize_host_name) + { + attributes.push(KeyValue::new(HOST_NAME_ATTRIBUTE, host_name)); + } + attributes +} + +fn detected_host_name() -> Option { + let host_name = gethostname(); + normalize_host_name(host_name.to_string_lossy().as_ref()) +} + +fn normalize_host_name(host_name: &str) -> Option { + let host_name = host_name.trim(); + (!host_name.is_empty()).then(|| host_name.to_owned()) +} + +fn build_logger( + resource: &Resource, + exporter: &OtelExporter, +) -> Result> { + let mut builder = SdkLoggerProvider::builder().with_resource(resource.clone()); + + match crate::config::resolve_exporter(exporter) { + OtelExporter::None => return Ok(builder.build()), + OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), + OtelExporter::OtlpGrpc { + endpoint, + headers, + tls, + } => { + debug!("Using OTLP Grpc exporter: {endpoint}"); + + let header_map = crate::otlp::build_header_map(&headers); + + let base_tls_config = ClientTlsConfig::new() + .with_enabled_roots() + .assume_http2(true); + + let tls_config = match tls.as_ref() { + Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, + None => base_tls_config, + }; + + let exporter = LogExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_metadata(MetadataMap::from_headers(header_map)) + .with_tls_config(tls_config) + .build()?; + + builder = builder.with_batch_exporter(exporter); + } + OtelExporter::OtlpHttp { + endpoint, + headers, + protocol, + tls, + } => { + debug!("Using OTLP Http exporter: {endpoint}"); + + let protocol = match protocol { + OtelHttpProtocol::Binary => Protocol::HttpBinary, + OtelHttpProtocol::Json => Protocol::HttpJson, + }; + + let mut exporter_builder = LogExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_protocol(protocol) + .with_headers(headers); + + if let Some(tls) = tls.as_ref() { + let client = crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?; + exporter_builder = exporter_builder.with_http_client(client); + } + + let exporter = exporter_builder.build()?; + + builder = builder.with_batch_exporter(exporter); + } + } + + Ok(builder.build()) +} + +fn build_tracer_provider( + resource: &Resource, + exporter: &OtelExporter, +) -> Result> { + let span_exporter = match crate::config::resolve_exporter(exporter) { + OtelExporter::None => return Ok(SdkTracerProvider::builder().build()), + OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"), + OtelExporter::OtlpGrpc { + endpoint, + headers, + tls, + } => { + debug!("Using OTLP Grpc exporter for traces: {endpoint}"); + + let header_map = crate::otlp::build_header_map(&headers); + + let base_tls_config = ClientTlsConfig::new() + .with_enabled_roots() + .assume_http2(true); + + let tls_config = match tls.as_ref() { + Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?, + None => base_tls_config, + }; + + SpanExporter::builder() + .with_tonic() + .with_endpoint(endpoint) + .with_metadata(MetadataMap::from_headers(header_map)) + .with_tls_config(tls_config) + .build()? + } + OtelExporter::OtlpHttp { + endpoint, + headers, + protocol, + tls, + } => { + debug!("Using OTLP Http exporter for traces: {endpoint}"); + + let protocol = match protocol { + OtelHttpProtocol::Binary => Protocol::HttpBinary, + OtelHttpProtocol::Json => Protocol::HttpJson, + }; + + let mut exporter_builder = SpanExporter::builder() + .with_http() + .with_endpoint(endpoint) + .with_protocol(protocol) + .with_headers(headers); + + if let Some(tls) = tls.as_ref() { + let client = + crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?; + exporter_builder = exporter_builder.with_http_client(client); + } + + exporter_builder.build()? + } + }; + + let processor = BatchSpanProcessor::builder(span_exporter).build(); + + Ok(SdkTracerProvider::builder() + .with_resource(resource.clone()) + .with_span_processor(processor) + .build()) +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use std::path::PathBuf; + + #[test] + fn resource_attributes_include_host_name_when_present() { + let attrs = resource_attributes( + &test_otel_settings(), + Some("opentelemetry-test"), + ResourceKind::Logs, + ); + + let host_name = attrs + .iter() + .find(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) + .map(|kv| kv.value.as_str().to_string()); + + assert_eq!(host_name, Some("opentelemetry-test".to_string())); + } + + #[test] + fn resource_attributes_omit_host_name_when_missing_or_empty() { + let missing = resource_attributes(&test_otel_settings(), None, ResourceKind::Logs); + let empty = resource_attributes(&test_otel_settings(), Some(" "), ResourceKind::Logs); + let trace_attrs = resource_attributes( + &test_otel_settings(), + Some("opentelemetry-test"), + ResourceKind::Traces, + ); + + assert!( + !missing + .iter() + .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) + ); + assert!( + !empty + .iter() + .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) + ); + assert!( + !trace_attrs + .iter() + .any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE) + ); + } + + #[test] + fn log_export_target_excludes_trace_safe_events() { + assert!(is_log_export_target("codex_otel.log_only")); + assert!(is_log_export_target("codex_otel.network_proxy")); + assert!(!is_log_export_target("codex_otel.trace_safe")); + assert!(!is_log_export_target("codex_otel.trace_safe.debug")); + } + + #[test] + fn trace_export_target_only_includes_trace_safe_prefix() { + assert!(is_trace_safe_target("codex_otel.trace_safe")); + assert!(is_trace_safe_target("codex_otel.trace_safe.summary")); + assert!(!is_trace_safe_target("codex_otel.log_only")); + assert!(!is_trace_safe_target("codex_otel.network_proxy")); + } + + fn test_otel_settings() -> OtelSettings { + OtelSettings { + environment: "test".to_string(), + service_name: "codex-test".to_string(), + service_version: "0.0.0".to_string(), + codex_home: PathBuf::from("."), + exporter: OtelExporter::None, + trace_exporter: OtelExporter::None, + metrics_exporter: OtelExporter::None, + runtime_metrics: false, + } + } +} diff --git a/codex-rs/otel/src/targets.rs b/codex-rs/otel/src/targets.rs new file mode 100644 index 000000000..765c70156 --- /dev/null +++ b/codex-rs/otel/src/targets.rs @@ -0,0 +1,11 @@ +pub(crate) const OTEL_TARGET_PREFIX: &str = "codex_otel"; +pub(crate) const OTEL_LOG_ONLY_TARGET: &str = "codex_otel.log_only"; +pub(crate) const OTEL_TRACE_SAFE_TARGET: &str = "codex_otel.trace_safe"; + +pub(crate) fn is_log_export_target(target: &str) -> bool { + target.starts_with(OTEL_TARGET_PREFIX) && !is_trace_safe_target(target) +} + +pub(crate) fn is_trace_safe_target(target: &str) -> bool { + target.starts_with(OTEL_TRACE_SAFE_TARGET) +} diff --git a/codex-rs/otel/src/trace_context.rs b/codex-rs/otel/src/trace_context.rs index f3ce0dd52..913bbb205 100644 --- a/codex-rs/otel/src/trace_context.rs +++ b/codex-rs/otel/src/trace_context.rs @@ -1,4 +1,6 @@ use std::collections::HashMap; +use std::env; +use std::sync::OnceLock; use codex_protocol::protocol::W3cTraceContext; use opentelemetry::Context; @@ -6,8 +8,14 @@ use opentelemetry::propagation::TextMapPropagator; use opentelemetry::trace::TraceContextExt; use opentelemetry_sdk::propagation::TraceContextPropagator; use tracing::Span; +use tracing::debug; +use tracing::warn; use tracing_opentelemetry::OpenTelemetrySpanExt; +const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT"; +const TRACESTATE_ENV_VAR: &str = "TRACESTATE"; +static TRACEPARENT_CONTEXT: OnceLock> = OnceLock::new(); + pub fn current_span_w3c_trace_context() -> Option { let context = Span::current().context(); if !context.span().span_context().is_valid() { @@ -51,6 +59,12 @@ pub fn set_parent_from_context(span: &Span, context: Context) { let _ = span.set_parent(context); } +pub fn traceparent_context_from_env() -> Option { + TRACEPARENT_CONTEXT + .get_or_init(load_traceparent_context) + .clone() +} + pub(crate) fn context_from_trace_headers( traceparent: Option<&str>, tracestate: Option<&str>, @@ -69,6 +83,22 @@ pub(crate) fn context_from_trace_headers( Some(context) } +fn load_traceparent_context() -> Option { + let traceparent = env::var(TRACEPARENT_ENV_VAR).ok()?; + let tracestate = env::var(TRACESTATE_ENV_VAR).ok(); + + match context_from_trace_headers(Some(&traceparent), tracestate.as_deref()) { + Some(context) => { + debug!("TRACEPARENT detected; continuing trace from parent context"); + Some(context) + } + None => { + warn!("TRACEPARENT is set but invalid; ignoring trace context"); + None + } + } +} + #[cfg(test)] mod tests { use super::context_from_trace_headers; diff --git a/codex-rs/otel/src/traces/mod.rs b/codex-rs/otel/src/traces/mod.rs deleted file mode 100644 index eca5bff16..000000000 --- a/codex-rs/otel/src/traces/mod.rs +++ /dev/null @@ -1 +0,0 @@ -pub mod otel_manager;