chore(otel): reorganize codex-otel crate (#13800)

## Summary
This is a structural cleanup of `codex-otel` to make the ownership
boundaries a lot clearer.

For example, previously it was quite confusing that `OtelManager` which
emits log + trace event telemetry lived under
`codex-rs/otel/src/traces/`. Also, there were two places that defined
methods on OtelManager via `impl OtelManager` (`lib.rs` and
`otel_manager.rs`).

What changed:
- move the `OtelProvider` implementation into `src/provider.rs`
- move `OtelManager` and session-scoped event emission into
`src/events/otel_manager.rs`
- collapse the shared log/trace event helpers into
`src/events/shared.rs`
- pull target classification into `src/targets.rs`
- move `traceparent_context_from_env()` into `src/trace_context.rs`
- keep `src/otel_provider.rs` as a compatibility shim for existing
imports
- update the `codex-otel` README to reflect the new layout

## Why
`lib.rs` and `otel_provider.rs` were doing too many different jobs at
once: provider setup, export routing, trace-context helpers, and session
event emission all lived together.

This refactor separates those concerns without trying to change the
behavior of the crate. The goal is to make future OTEL work easier to
reason about and easier to review.

## Notes
- no intended behavior change
- `OtelManager` remains the session-scoped event emitter in this PR
- the `otel_provider` shim keeps downstream churn low while the
internals move around

## Validation
- `just fmt`
- `cargo test -p codex-otel`
- `just fix -p codex-otel`
This commit is contained in:
Owen Lin 2026-03-06 14:58:18 -08:00 committed by GitHub
parent 8ede18011a
commit dd4a5216c9
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 782 additions and 813 deletions

View file

@ -2,10 +2,11 @@
`codex-otel` is the OpenTelemetry integration crate for Codex. It provides:
- Trace/log/metrics exporters and tracing subscriber layers (`codex_otel::otel_provider`).
- A structured event helper (`codex_otel::OtelManager`).
- OpenTelemetry metrics support via OTLP exporters (`codex_otel::metrics`).
- A metrics facade on `OtelManager` so tracing + metrics share metadata.
- Provider wiring for log/trace/metric exporters (`codex_otel::OtelProvider`,
`codex_otel::provider`, and the compatibility shim `codex_otel::otel_provider`).
- Session-scoped business event emission via `codex_otel::OtelManager`.
- Low-level metrics APIs via `codex_otel::metrics`.
- Trace-context helpers via `codex_otel::trace_context` and crate-root re-exports.
## Tracing and logs
@ -17,7 +18,7 @@ registry:
use codex_otel::config::OtelExporter;
use codex_otel::config::OtelHttpProtocol;
use codex_otel::config::OtelSettings;
use codex_otel::otel_provider::OtelProvider;
use codex_otel::OtelProvider;
use tracing_subscriber::prelude::*;
let settings = OtelSettings {
@ -51,7 +52,8 @@ if let Some(provider) = OtelProvider::from(&settings)? {
## OtelManager (events)
`OtelManager` adds consistent metadata to tracing events and helps record
Codex-specific events.
Codex-specific session events. Rich session/business events should go through
`OtelManager`; subsystem-owned audit events can stay with the owning subsystem.
```rust
use codex_otel::OtelManager;
@ -63,6 +65,7 @@ let manager = OtelManager::new(
account_id,
account_email,
auth_mode,
originator,
log_user_prompts,
terminal_type,
session_source,
@ -119,6 +122,15 @@ metrics.counter("codex.turns", 1, &[("model", "gpt-5.1")])?;
metrics.shutdown()?; // flushes in-memory exporter
```
## Trace context
Trace propagation helpers remain separate from the session event emitter:
```rust
use codex_otel::current_span_w3c_trace_context;
use codex_otel::set_parent_from_w3c_trace_context;
```
## Shutdown
- `OtelProvider::shutdown()` stops the OTEL exporter.

View file

@ -0,0 +1,2 @@
pub(crate) mod otel_manager;
pub(crate) mod shared;

View file

@ -1,6 +1,12 @@
use crate::OTEL_LOG_ONLY_TARGET;
use crate::OTEL_TRACE_SAFE_TARGET;
use crate::TelemetryAuthMode;
use crate::ToolDecisionSource;
use crate::events::shared::log_and_trace_event;
use crate::events::shared::log_event;
use crate::events::shared::trace_event;
use crate::metrics::MetricsClient;
use crate::metrics::MetricsConfig;
use crate::metrics::MetricsError;
use crate::metrics::Result as MetricsResult;
use crate::metrics::names::API_CALL_COUNT_METRIC;
use crate::metrics::names::API_CALL_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC;
@ -17,9 +23,12 @@ use crate::metrics::names::WEBSOCKET_EVENT_COUNT_METRIC;
use crate::metrics::names::WEBSOCKET_EVENT_DURATION_METRIC;
use crate::metrics::names::WEBSOCKET_REQUEST_COUNT_METRIC;
use crate::metrics::names::WEBSOCKET_REQUEST_DURATION_METRIC;
use crate::metrics::runtime_metrics::RuntimeMetricsSummary;
use crate::metrics::timer::Timer;
use crate::metrics::validation::validate_tag_key;
use crate::metrics::validation::validate_tag_value;
use crate::provider::OtelProvider;
use crate::sanitize_metric_tag_value;
use chrono::SecondsFormat;
use chrono::Utc;
use codex_api::ApiError;
use codex_api::ResponseEvent;
use codex_protocol::ThreadId;
@ -33,20 +42,16 @@ use codex_protocol::protocol::SessionSource;
use codex_protocol::user_input::UserInput;
use eventsource_stream::Event as StreamEvent;
use eventsource_stream::EventStreamError as StreamError;
use opentelemetry_sdk::metrics::data::ResourceMetrics;
use reqwest::Error;
use reqwest::Response;
use std::borrow::Cow;
use std::fmt::Display;
use std::future::Future;
use std::time::Duration;
use std::time::Instant;
use tokio::time::error::Elapsed;
use tracing::Span;
pub use crate::OtelEventMetadata;
pub use crate::OtelManager;
pub use crate::ToolDecisionSource;
const SSE_UNKNOWN_KIND: &str = "unknown";
const WEBSOCKET_UNKNOWN_KIND: &str = "unknown";
const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing";
@ -58,85 +63,207 @@ const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total
const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms";
const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms";
macro_rules! log_event {
($self:expr, $($fields:tt)*) => {{
tracing::event!(
target: OTEL_LOG_ONLY_TARGET,
tracing::Level::INFO,
$($fields)*
event.timestamp = %timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
user.account_id = $self.metadata.account_id,
user.email = $self.metadata.account_email,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
}};
#[derive(Debug, Clone)]
pub struct OtelEventMetadata {
pub(crate) conversation_id: ThreadId,
pub(crate) auth_mode: Option<String>,
pub(crate) account_id: Option<String>,
pub(crate) account_email: Option<String>,
pub(crate) originator: String,
pub(crate) service_name: Option<String>,
pub(crate) session_source: String,
pub(crate) model: String,
pub(crate) slug: String,
pub(crate) log_user_prompts: bool,
pub(crate) app_version: &'static str,
pub(crate) terminal_type: String,
}
macro_rules! trace_event {
($self:expr, $($fields:tt)*) => {{
tracing::event!(
target: OTEL_TRACE_SAFE_TARGET,
tracing::Level::INFO,
$($fields)*
event.timestamp = %timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
}};
}
macro_rules! log_and_trace_event {
(
$self:expr,
common: { $($common:tt)* },
log: { $($log:tt)* },
trace: { $($trace:tt)* },
) => {{
tracing::event!(
target: OTEL_LOG_ONLY_TARGET,
tracing::Level::INFO,
$($common)*
$($log)*
event.timestamp = %timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
user.account_id = $self.metadata.account_id,
user.email = $self.metadata.account_email,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
tracing::event!(
target: OTEL_TRACE_SAFE_TARGET,
tracing::Level::INFO,
$($common)*
$($trace)*
event.timestamp = %timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
}};
#[derive(Debug, Clone)]
pub struct OtelManager {
pub(crate) metadata: OtelEventMetadata,
pub(crate) metrics: Option<MetricsClient>,
pub(crate) metrics_use_metadata_tags: bool,
}
impl OtelManager {
pub fn with_model(mut self, model: &str, slug: &str) -> Self {
self.metadata.model = model.to_owned();
self.metadata.slug = slug.to_owned();
self
}
pub fn with_metrics_service_name(mut self, service_name: &str) -> Self {
self.metadata.service_name = Some(sanitize_metric_tag_value(service_name));
self
}
pub fn with_metrics(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = true;
self
}
pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = false;
self
}
pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult<Self> {
let metrics = MetricsClient::new(config)?;
Ok(self.with_metrics(metrics))
}
pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self {
match provider.metrics() {
Some(metrics) => self.with_metrics(metrics.clone()),
None => self,
}
}
pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.counter(name, inc, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics counter [{name}] failed: {e}");
}
}
pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.histogram(name, value, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics histogram [{name}] failed: {e}");
}
}
pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.record_duration(name, duration, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics duration [{name}] failed: {e}");
}
}
pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result<Timer, MetricsError> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
let tags = self.tags_with_metadata(tags)?;
metrics.start_timer(name, &tags)
}
pub fn shutdown_metrics(&self) -> MetricsResult<()> {
let Some(metrics) = &self.metrics else {
return Ok(());
};
metrics.shutdown()
}
pub fn snapshot_metrics(&self) -> MetricsResult<ResourceMetrics> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
metrics.snapshot()
}
/// Collect and discard a runtime metrics snapshot to reset delta accumulators.
pub fn reset_runtime_metrics(&self) {
if self.metrics.is_none() {
return;
}
if let Err(err) = self.snapshot_metrics() {
tracing::debug!("runtime metrics reset skipped: {err}");
}
}
/// Collect a runtime metrics summary if debug snapshots are available.
pub fn runtime_metrics_summary(&self) -> Option<RuntimeMetricsSummary> {
let snapshot = match self.snapshot_metrics() {
Ok(snapshot) => snapshot,
Err(_) => {
return None;
}
};
let summary = RuntimeMetricsSummary::from_snapshot(&snapshot);
if summary.is_empty() {
None
} else {
Some(summary)
}
}
fn tags_with_metadata<'a>(
&'a self,
tags: &'a [(&'a str, &'a str)],
) -> MetricsResult<Vec<(&'a str, &'a str)>> {
let mut merged = self.metadata_tag_refs()?;
merged.extend(tags.iter().copied());
Ok(merged)
}
fn metadata_tag_refs(&self) -> MetricsResult<Vec<(&str, &str)>> {
if !self.metrics_use_metadata_tags {
return Ok(Vec::new());
}
let mut tags = Vec::with_capacity(7);
Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?;
Self::push_metadata_tag(
&mut tags,
"session_source",
Some(self.metadata.session_source.as_str()),
)?;
Self::push_metadata_tag(
&mut tags,
"originator",
Some(self.metadata.originator.as_str()),
)?;
Self::push_metadata_tag(
&mut tags,
"service_name",
self.metadata.service_name.as_deref(),
)?;
Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?;
Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?;
Ok(tags)
}
fn push_metadata_tag<'a>(
tags: &mut Vec<(&'a str, &'a str)>,
key: &'static str,
value: Option<&'a str>,
) -> MetricsResult<()> {
let Some(value) = value else {
return Ok(());
};
validate_tag_key(key)?;
validate_tag_value(value)?;
tags.push((key, value));
Ok(())
}
#[allow(clippy::too_many_arguments)]
pub fn new(
conversation_id: ThreadId,
@ -176,13 +303,13 @@ impl OtelManager {
match event {
ResponseEvent::OutputItemDone(item) => {
handle_responses_span.record("from", "output_item_done");
if let ResponseItem::FunctionCall { name, .. } = &item {
if let ResponseItem::FunctionCall { name, .. } = item {
handle_responses_span.record("tool_name", name.as_str());
}
}
ResponseEvent::OutputItemAdded(item) => {
handle_responses_span.record("from", "output_item_added");
if let ResponseItem::FunctionCall { name, .. } = &item {
if let ResponseItem::FunctionCall { name, .. } = item {
handle_responses_span.record("tool_name", name.as_str());
}
}
@ -406,7 +533,7 @@ impl OtelManager {
response: &Result<Option<Result<StreamEvent, StreamError<E>>>, Elapsed>,
duration: Duration,
) where
E: Display,
E: std::fmt::Display,
{
match response {
Ok(Some(Ok(sse))) => {
@ -469,7 +596,7 @@ impl OtelManager {
pub fn sse_event_failed<T>(&self, kind: Option<&String>, duration: Duration, error: &T)
where
T: Display,
T: std::fmt::Display,
{
let kind_str = kind.map_or(SSE_UNKNOWN_KIND, String::as_str);
self.counter(
@ -508,7 +635,7 @@ impl OtelManager {
pub fn see_event_completed_failed<T>(&self, error: &T)
where
T: Display,
T: std::fmt::Display,
{
log_and_trace_event!(
self,
@ -620,7 +747,7 @@ impl OtelManager {
where
F: FnOnce() -> Fut,
Fut: Future<Output = Result<(String, bool), E>>,
E: Display,
E: std::fmt::Display,
{
let start = Instant::now();
let result = f().await;
@ -774,8 +901,9 @@ impl OtelManager {
fn responses_type(event: &ResponseEvent) -> String {
match event {
ResponseEvent::Created => "created".into(),
ResponseEvent::OutputItemDone(item) => OtelManager::responses_item_type(item),
ResponseEvent::OutputItemAdded(item) => OtelManager::responses_item_type(item),
ResponseEvent::OutputItemDone(item) | ResponseEvent::OutputItemAdded(item) => {
OtelManager::responses_item_type(item)
}
ResponseEvent::Completed { .. } => "completed".into(),
ResponseEvent::OutputTextDelta(_) => "text_delta".into(),
ResponseEvent::ReasoningSummaryDelta { .. } => "reasoning_summary_delta".into(),
@ -808,10 +936,6 @@ impl OtelManager {
}
}
fn timestamp() -> String {
Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true)
}
fn duration_from_ms_value(value: Option<&serde_json::Value>) -> Option<Duration> {
let value = value?;
let ms = value

View file

@ -0,0 +1,60 @@
use chrono::SecondsFormat;
use chrono::Utc;
macro_rules! log_event {
($self:expr, $($fields:tt)*) => {{
tracing::event!(
target: $crate::targets::OTEL_LOG_ONLY_TARGET,
tracing::Level::INFO,
$($fields)*
event.timestamp = %$crate::events::shared::timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
user.account_id = $self.metadata.account_id,
user.email = $self.metadata.account_email,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
}};
}
macro_rules! trace_event {
($self:expr, $($fields:tt)*) => {{
tracing::event!(
target: $crate::targets::OTEL_TRACE_SAFE_TARGET,
tracing::Level::INFO,
$($fields)*
event.timestamp = %$crate::events::shared::timestamp(),
conversation.id = %$self.metadata.conversation_id,
app.version = %$self.metadata.app_version,
auth_mode = $self.metadata.auth_mode,
originator = %$self.metadata.originator,
terminal.type = %$self.metadata.terminal_type,
model = %$self.metadata.model,
slug = %$self.metadata.slug,
);
}};
}
macro_rules! log_and_trace_event {
(
$self:expr,
common: { $($common:tt)* },
log: { $($log:tt)* },
trace: { $($trace:tt)* },
) => {{
log_event!($self, $($common)* $($log)*);
trace_event!($self, $($common)* $($trace)*);
}};
}
pub(crate) use log_and_trace_event;
pub(crate) use log_event;
pub(crate) use trace_event;
pub(crate) fn timestamp() -> String {
Utc::now().to_rfc3339_opts(SecondsFormat::Millis, true)
}

View file

@ -1,39 +1,31 @@
pub mod config;
mod events;
pub mod metrics;
pub mod otel_provider;
pub mod provider;
pub mod trace_context;
pub mod traces;
mod otlp;
mod targets;
use crate::metrics::MetricsClient;
use crate::metrics::MetricsConfig;
use crate::metrics::MetricsError;
use crate::metrics::Result as MetricsResult;
pub use crate::metrics::timer::Timer;
use crate::metrics::validation::validate_tag_key;
use crate::metrics::validation::validate_tag_value;
use crate::otel_provider::OtelProvider;
use codex_protocol::ThreadId;
pub use codex_utils_string::sanitize_metric_tag_value;
use opentelemetry_sdk::metrics::data::ResourceMetrics;
use serde::Serialize;
use std::time::Duration;
use strum_macros::Display;
use tracing::debug;
pub use crate::events::otel_manager::OtelEventMetadata;
pub use crate::events::otel_manager::OtelManager;
pub use crate::metrics::runtime_metrics::RuntimeMetricTotals;
pub use crate::metrics::runtime_metrics::RuntimeMetricsSummary;
pub use crate::otel_provider::traceparent_context_from_env;
pub use crate::metrics::timer::Timer;
pub use crate::provider::OtelProvider;
pub use crate::trace_context::context_from_w3c_trace_context;
pub use crate::trace_context::current_span_trace_id;
pub use crate::trace_context::current_span_w3c_trace_context;
pub use crate::trace_context::set_parent_from_context;
pub use crate::trace_context::set_parent_from_w3c_trace_context;
pub(crate) const OTEL_TARGET_PREFIX: &str = "codex_otel";
pub(crate) const OTEL_LOG_ONLY_TARGET: &str = "codex_otel.log_only";
pub(crate) const OTEL_TRACE_SAFE_TARGET: &str = "codex_otel.trace_safe";
pub use crate::trace_context::traceparent_context_from_env;
pub use codex_utils_string::sanitize_metric_tag_value;
#[derive(Debug, Clone, Serialize, Display)]
#[serde(rename_all = "snake_case")]
@ -49,208 +41,6 @@ pub enum TelemetryAuthMode {
Chatgpt,
}
#[derive(Debug, Clone)]
pub struct OtelEventMetadata {
pub(crate) conversation_id: ThreadId,
pub(crate) auth_mode: Option<String>,
pub(crate) account_id: Option<String>,
pub(crate) account_email: Option<String>,
pub(crate) originator: String,
pub(crate) service_name: Option<String>,
pub(crate) session_source: String,
pub(crate) model: String,
pub(crate) slug: String,
pub(crate) log_user_prompts: bool,
pub(crate) app_version: &'static str,
pub(crate) terminal_type: String,
}
#[derive(Debug, Clone)]
pub struct OtelManager {
pub(crate) metadata: OtelEventMetadata,
pub(crate) metrics: Option<MetricsClient>,
pub(crate) metrics_use_metadata_tags: bool,
}
impl OtelManager {
pub fn with_model(mut self, model: &str, slug: &str) -> Self {
self.metadata.model = model.to_owned();
self.metadata.slug = slug.to_owned();
self
}
pub fn with_metrics_service_name(mut self, service_name: &str) -> Self {
self.metadata.service_name = Some(sanitize_metric_tag_value(service_name));
self
}
pub fn with_metrics(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = true;
self
}
pub fn with_metrics_without_metadata_tags(mut self, metrics: MetricsClient) -> Self {
self.metrics = Some(metrics);
self.metrics_use_metadata_tags = false;
self
}
pub fn with_metrics_config(self, config: MetricsConfig) -> MetricsResult<Self> {
let metrics = MetricsClient::new(config)?;
Ok(self.with_metrics(metrics))
}
pub fn with_provider_metrics(self, provider: &OtelProvider) -> Self {
match provider.metrics() {
Some(metrics) => self.with_metrics(metrics.clone()),
None => self,
}
}
pub fn counter(&self, name: &str, inc: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.counter(name, inc, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics counter [{name}] failed: {e}");
}
}
pub fn histogram(&self, name: &str, value: i64, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.histogram(name, value, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics histogram [{name}] failed: {e}");
}
}
pub fn record_duration(&self, name: &str, duration: Duration, tags: &[(&str, &str)]) {
let res: MetricsResult<()> = (|| {
let Some(metrics) = &self.metrics else {
return Ok(());
};
let tags = self.tags_with_metadata(tags)?;
metrics.record_duration(name, duration, &tags)
})();
if let Err(e) = res {
tracing::warn!("metrics duration [{name}] failed: {e}");
}
}
pub fn start_timer(&self, name: &str, tags: &[(&str, &str)]) -> Result<Timer, MetricsError> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
let tags = self.tags_with_metadata(tags)?;
metrics.start_timer(name, &tags)
}
pub fn shutdown_metrics(&self) -> MetricsResult<()> {
let Some(metrics) = &self.metrics else {
return Ok(());
};
metrics.shutdown()
}
pub fn snapshot_metrics(&self) -> MetricsResult<ResourceMetrics> {
let Some(metrics) = &self.metrics else {
return Err(MetricsError::ExporterDisabled);
};
metrics.snapshot()
}
/// Collect and discard a runtime metrics snapshot to reset delta accumulators.
pub fn reset_runtime_metrics(&self) {
if self.metrics.is_none() {
return;
}
if let Err(err) = self.snapshot_metrics() {
debug!("runtime metrics reset skipped: {err}");
}
}
/// Collect a runtime metrics summary if debug snapshots are available.
pub fn runtime_metrics_summary(&self) -> Option<RuntimeMetricsSummary> {
let snapshot = match self.snapshot_metrics() {
Ok(snapshot) => snapshot,
Err(_) => {
return None;
}
};
let summary = RuntimeMetricsSummary::from_snapshot(&snapshot);
if summary.is_empty() {
None
} else {
Some(summary)
}
}
fn tags_with_metadata<'a>(
&'a self,
tags: &'a [(&'a str, &'a str)],
) -> MetricsResult<Vec<(&'a str, &'a str)>> {
let mut merged = self.metadata_tag_refs()?;
merged.extend(tags.iter().copied());
Ok(merged)
}
fn metadata_tag_refs(&self) -> MetricsResult<Vec<(&str, &str)>> {
if !self.metrics_use_metadata_tags {
return Ok(Vec::new());
}
let mut tags = Vec::with_capacity(7);
Self::push_metadata_tag(&mut tags, "auth_mode", self.metadata.auth_mode.as_deref())?;
Self::push_metadata_tag(
&mut tags,
"session_source",
Some(self.metadata.session_source.as_str()),
)?;
Self::push_metadata_tag(
&mut tags,
"originator",
Some(self.metadata.originator.as_str()),
)?;
Self::push_metadata_tag(
&mut tags,
"service_name",
self.metadata.service_name.as_deref(),
)?;
Self::push_metadata_tag(&mut tags, "model", Some(self.metadata.model.as_str()))?;
Self::push_metadata_tag(&mut tags, "app.version", Some(self.metadata.app_version))?;
Ok(tags)
}
fn push_metadata_tag<'a>(
tags: &mut Vec<(&'a str, &'a str)>,
key: &'static str,
value: Option<&'a str>,
) -> MetricsResult<()> {
let Some(value) = value else {
return Ok(());
};
validate_tag_key(key)?;
validate_tag_value(value)?;
tags.push((key, value));
Ok(())
}
}
/// Start a metrics timer using the globally installed metrics client.
pub fn start_global_timer(name: &str, tags: &[(&str, &str)]) -> MetricsResult<Timer> {
let Some(metrics) = crate::metrics::global() else {

View file

@ -1,493 +1,4 @@
use crate::OTEL_TARGET_PREFIX;
use crate::OTEL_TRACE_SAFE_TARGET;
use crate::config::OtelExporter;
use crate::config::OtelHttpProtocol;
use crate::config::OtelSettings;
use crate::metrics::MetricsClient;
use crate::metrics::MetricsConfig;
use crate::trace_context::context_from_trace_headers;
use gethostname::gethostname;
use opentelemetry::Context;
use opentelemetry::KeyValue;
use opentelemetry::global;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
use opentelemetry_otlp::LogExporter;
use opentelemetry_otlp::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT;
use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT;
use opentelemetry_otlp::Protocol;
use opentelemetry_otlp::SpanExporter;
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_otlp::WithHttpConfig;
use opentelemetry_otlp::WithTonicConfig;
use opentelemetry_otlp::tonic_types::metadata::MetadataMap;
use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig;
use opentelemetry_sdk::Resource;
use opentelemetry_sdk::logs::SdkLoggerProvider;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry_sdk::trace::BatchSpanProcessor;
use opentelemetry_sdk::trace::SdkTracerProvider;
use opentelemetry_sdk::trace::Tracer;
use opentelemetry_semantic_conventions as semconv;
use std::env;
use std::error::Error;
use std::sync::OnceLock;
use tracing::debug;
use tracing::warn;
use tracing_subscriber::Layer;
use tracing_subscriber::registry::LookupSpan;
//! Compatibility shim for `codex_otel::otel_provider`.
const ENV_ATTRIBUTE: &str = "env";
const HOST_NAME_ATTRIBUTE: &str = "host.name";
const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT";
const TRACESTATE_ENV_VAR: &str = "TRACESTATE";
static TRACEPARENT_CONTEXT: OnceLock<Option<Context>> = OnceLock::new();
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ResourceKind {
Logs,
Traces,
}
pub struct OtelProvider {
pub logger: Option<SdkLoggerProvider>,
pub tracer_provider: Option<SdkTracerProvider>,
pub tracer: Option<Tracer>,
pub metrics: Option<MetricsClient>,
}
impl OtelProvider {
pub fn shutdown(&self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
}
pub fn from(settings: &OtelSettings) -> Result<Option<Self>, Box<dyn Error>> {
let log_enabled = !matches!(settings.exporter, OtelExporter::None);
let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None);
let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter);
let metrics = if matches!(metric_exporter, OtelExporter::None) {
None
} else {
let mut config = MetricsConfig::otlp(
settings.environment.clone(),
settings.service_name.clone(),
settings.service_version.clone(),
metric_exporter,
);
if settings.runtime_metrics {
config = config.with_runtime_reader();
}
Some(MetricsClient::new(config)?)
};
if let Some(metrics) = metrics.as_ref() {
crate::metrics::install_global(metrics.clone());
}
if !log_enabled && !trace_enabled && metrics.is_none() {
debug!("No OTEL exporter enabled in settings.");
return Ok(None);
}
let log_resource = make_resource(settings, ResourceKind::Logs);
let trace_resource = make_resource(settings, ResourceKind::Traces);
let logger = log_enabled
.then(|| build_logger(&log_resource, &settings.exporter))
.transpose()?;
let tracer_provider = trace_enabled
.then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter))
.transpose()?;
let tracer = tracer_provider
.as_ref()
.map(|provider| provider.tracer(settings.service_name.clone()));
if let Some(provider) = tracer_provider.clone() {
global::set_tracer_provider(provider);
global::set_text_map_propagator(TraceContextPropagator::new());
}
Ok(Some(Self {
logger,
tracer_provider,
tracer,
metrics,
}))
}
pub fn logger_layer<S>(&self) -> Option<impl Layer<S> + Send + Sync>
where
S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
{
self.logger.as_ref().map(|logger| {
OpenTelemetryTracingBridge::new(logger).with_filter(
tracing_subscriber::filter::filter_fn(OtelProvider::log_export_filter),
)
})
}
pub fn tracing_layer<S>(&self) -> Option<impl Layer<S> + Send + Sync>
where
S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
{
self.tracer.as_ref().map(|tracer| {
tracing_opentelemetry::layer()
.with_tracer(tracer.clone())
.with_filter(tracing_subscriber::filter::filter_fn(
OtelProvider::trace_export_filter,
))
})
}
pub fn codex_export_filter(meta: &tracing::Metadata<'_>) -> bool {
Self::log_export_filter(meta)
}
pub fn log_export_filter(meta: &tracing::Metadata<'_>) -> bool {
is_log_export_target(meta.target())
}
pub fn trace_export_filter(meta: &tracing::Metadata<'_>) -> bool {
meta.is_span() || is_trace_safe_target(meta.target())
}
pub fn metrics(&self) -> Option<&MetricsClient> {
self.metrics.as_ref()
}
}
impl Drop for OtelProvider {
fn drop(&mut self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
}
}
pub fn traceparent_context_from_env() -> Option<Context> {
TRACEPARENT_CONTEXT
.get_or_init(load_traceparent_context)
.clone()
}
fn load_traceparent_context() -> Option<Context> {
let traceparent = env::var(TRACEPARENT_ENV_VAR).ok()?;
let tracestate = env::var(TRACESTATE_ENV_VAR).ok();
match context_from_trace_headers(Some(&traceparent), tracestate.as_deref()) {
Some(context) => {
debug!("TRACEPARENT detected; continuing trace from parent context");
Some(context)
}
None => {
warn!("TRACEPARENT is set but invalid; ignoring trace context");
None
}
}
}
fn make_resource(settings: &OtelSettings, kind: ResourceKind) -> Resource {
Resource::builder()
.with_service_name(settings.service_name.clone())
.with_attributes(resource_attributes(
settings,
detected_host_name().as_deref(),
kind,
))
.build()
}
fn resource_attributes(
settings: &OtelSettings,
host_name: Option<&str>,
kind: ResourceKind,
) -> Vec<KeyValue> {
let mut attributes = vec![
KeyValue::new(
semconv::attribute::SERVICE_VERSION,
settings.service_version.clone(),
),
KeyValue::new(ENV_ATTRIBUTE, settings.environment.clone()),
];
if kind == ResourceKind::Logs
&& let Some(host_name) = host_name.and_then(normalize_host_name)
{
attributes.push(KeyValue::new(HOST_NAME_ATTRIBUTE, host_name));
}
attributes
}
fn detected_host_name() -> Option<String> {
let host_name = gethostname();
normalize_host_name(host_name.to_string_lossy().as_ref())
}
fn normalize_host_name(host_name: &str) -> Option<String> {
let host_name = host_name.trim();
(!host_name.is_empty()).then(|| host_name.to_owned())
}
fn is_log_export_target(target: &str) -> bool {
target.starts_with(OTEL_TARGET_PREFIX) && !is_trace_safe_target(target)
}
fn is_trace_safe_target(target: &str) -> bool {
target.starts_with(OTEL_TRACE_SAFE_TARGET)
}
fn build_logger(
resource: &Resource,
exporter: &OtelExporter,
) -> Result<SdkLoggerProvider, Box<dyn Error>> {
let mut builder = SdkLoggerProvider::builder().with_resource(resource.clone());
match crate::config::resolve_exporter(exporter) {
OtelExporter::None => return Ok(builder.build()),
OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"),
OtelExporter::OtlpGrpc {
endpoint,
headers,
tls,
} => {
debug!("Using OTLP Grpc exporter: {endpoint}");
let header_map = crate::otlp::build_header_map(&headers);
let base_tls_config = ClientTlsConfig::new()
.with_enabled_roots()
.assume_http2(true);
let tls_config = match tls.as_ref() {
Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?,
None => base_tls_config,
};
let exporter = LogExporter::builder()
.with_tonic()
.with_endpoint(endpoint)
.with_metadata(MetadataMap::from_headers(header_map))
.with_tls_config(tls_config)
.build()?;
builder = builder.with_batch_exporter(exporter);
}
OtelExporter::OtlpHttp {
endpoint,
headers,
protocol,
tls,
} => {
debug!("Using OTLP Http exporter: {endpoint}");
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,
};
let mut exporter_builder = LogExporter::builder()
.with_http()
.with_endpoint(endpoint)
.with_protocol(protocol)
.with_headers(headers);
if let Some(tls) = tls.as_ref() {
let client = crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?;
exporter_builder = exporter_builder.with_http_client(client);
}
let exporter = exporter_builder.build()?;
builder = builder.with_batch_exporter(exporter);
}
}
Ok(builder.build())
}
fn build_tracer_provider(
resource: &Resource,
exporter: &OtelExporter,
) -> Result<SdkTracerProvider, Box<dyn Error>> {
let span_exporter = match crate::config::resolve_exporter(exporter) {
OtelExporter::None => return Ok(SdkTracerProvider::builder().build()),
OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"),
OtelExporter::OtlpGrpc {
endpoint,
headers,
tls,
} => {
debug!("Using OTLP Grpc exporter for traces: {endpoint}");
let header_map = crate::otlp::build_header_map(&headers);
let base_tls_config = ClientTlsConfig::new()
.with_enabled_roots()
.assume_http2(true);
let tls_config = match tls.as_ref() {
Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?,
None => base_tls_config,
};
SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint)
.with_metadata(MetadataMap::from_headers(header_map))
.with_tls_config(tls_config)
.build()?
}
OtelExporter::OtlpHttp {
endpoint,
headers,
protocol,
tls,
} => {
debug!("Using OTLP Http exporter for traces: {endpoint}");
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,
};
let mut exporter_builder = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.with_protocol(protocol)
.with_headers(headers);
if let Some(tls) = tls.as_ref() {
let client =
crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?;
exporter_builder = exporter_builder.with_http_client(client);
}
exporter_builder.build()?
}
};
let processor = BatchSpanProcessor::builder(span_exporter).build();
Ok(SdkTracerProvider::builder()
.with_resource(resource.clone())
.with_span_processor(processor)
.build())
}
#[cfg(test)]
mod tests {
use super::*;
use opentelemetry::trace::SpanId;
use opentelemetry::trace::TraceContextExt;
use opentelemetry::trace::TraceId;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
#[test]
fn parses_valid_traceparent() {
let trace_id = "00000000000000000000000000000001";
let span_id = "0000000000000002";
let context =
context_from_trace_headers(Some(&format!("00-{trace_id}-{span_id}-01")), None)
.expect("trace context");
let span = context.span();
let span_context = span.span_context();
assert_eq!(
span_context.trace_id(),
TraceId::from_hex(trace_id).unwrap()
);
assert_eq!(span_context.span_id(), SpanId::from_hex(span_id).unwrap());
assert!(span_context.is_remote());
}
#[test]
fn invalid_traceparent_returns_none() {
assert!(context_from_trace_headers(Some("not-a-traceparent"), None).is_none());
}
#[test]
fn resource_attributes_include_host_name_when_present() {
let attrs = resource_attributes(
&test_otel_settings(),
Some("opentelemetry-test"),
ResourceKind::Logs,
);
let host_name = attrs
.iter()
.find(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
.map(|kv| kv.value.as_str().to_string());
assert_eq!(host_name, Some("opentelemetry-test".to_string()));
}
#[test]
fn resource_attributes_omit_host_name_when_missing_or_empty() {
let missing = resource_attributes(&test_otel_settings(), None, ResourceKind::Logs);
let empty = resource_attributes(&test_otel_settings(), Some(" "), ResourceKind::Logs);
let trace_attrs = resource_attributes(
&test_otel_settings(),
Some("opentelemetry-test"),
ResourceKind::Traces,
);
assert!(
!missing
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
assert!(
!empty
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
assert!(
!trace_attrs
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
}
#[test]
fn log_export_target_excludes_trace_safe_events() {
assert!(is_log_export_target("codex_otel.log_only"));
assert!(is_log_export_target("codex_otel.network_proxy"));
assert!(!is_log_export_target("codex_otel.trace_safe"));
assert!(!is_log_export_target("codex_otel.trace_safe.debug"));
}
#[test]
fn trace_export_target_only_includes_trace_safe_prefix() {
assert!(is_trace_safe_target("codex_otel.trace_safe"));
assert!(is_trace_safe_target("codex_otel.trace_safe.summary"));
assert!(!is_trace_safe_target("codex_otel.log_only"));
assert!(!is_trace_safe_target("codex_otel.network_proxy"));
}
fn test_otel_settings() -> OtelSettings {
OtelSettings {
environment: "test".to_string(),
service_name: "codex-test".to_string(),
service_version: "0.0.0".to_string(),
codex_home: PathBuf::from("."),
exporter: OtelExporter::None,
trace_exporter: OtelExporter::None,
metrics_exporter: OtelExporter::None,
runtime_metrics: false,
}
}
}
pub use crate::provider::*;
pub use crate::trace_context::traceparent_context_from_env;

View file

@ -0,0 +1,430 @@
use crate::config::OtelExporter;
use crate::config::OtelHttpProtocol;
use crate::config::OtelSettings;
use crate::metrics::MetricsClient;
use crate::metrics::MetricsConfig;
use crate::targets::is_log_export_target;
use crate::targets::is_trace_safe_target;
use gethostname::gethostname;
use opentelemetry::KeyValue;
use opentelemetry::global;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_appender_tracing::layer::OpenTelemetryTracingBridge;
use opentelemetry_otlp::LogExporter;
use opentelemetry_otlp::OTEL_EXPORTER_OTLP_LOGS_TIMEOUT;
use opentelemetry_otlp::OTEL_EXPORTER_OTLP_TRACES_TIMEOUT;
use opentelemetry_otlp::Protocol;
use opentelemetry_otlp::SpanExporter;
use opentelemetry_otlp::WithExportConfig;
use opentelemetry_otlp::WithHttpConfig;
use opentelemetry_otlp::WithTonicConfig;
use opentelemetry_otlp::tonic_types::metadata::MetadataMap;
use opentelemetry_otlp::tonic_types::transport::ClientTlsConfig;
use opentelemetry_sdk::Resource;
use opentelemetry_sdk::logs::SdkLoggerProvider;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry_sdk::trace::BatchSpanProcessor;
use opentelemetry_sdk::trace::SdkTracerProvider;
use opentelemetry_sdk::trace::Tracer;
use opentelemetry_semantic_conventions as semconv;
use std::error::Error;
use tracing::debug;
use tracing_subscriber::Layer;
use tracing_subscriber::registry::LookupSpan;
const ENV_ATTRIBUTE: &str = "env";
const HOST_NAME_ATTRIBUTE: &str = "host.name";
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
enum ResourceKind {
Logs,
Traces,
}
pub struct OtelProvider {
pub logger: Option<SdkLoggerProvider>,
pub tracer_provider: Option<SdkTracerProvider>,
pub tracer: Option<Tracer>,
pub metrics: Option<MetricsClient>,
}
impl OtelProvider {
pub fn shutdown(&self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
}
pub fn from(settings: &OtelSettings) -> Result<Option<Self>, Box<dyn Error>> {
let log_enabled = !matches!(settings.exporter, OtelExporter::None);
let trace_enabled = !matches!(settings.trace_exporter, OtelExporter::None);
let metric_exporter = crate::config::resolve_exporter(&settings.metrics_exporter);
let metrics = if matches!(metric_exporter, OtelExporter::None) {
None
} else {
let mut config = MetricsConfig::otlp(
settings.environment.clone(),
settings.service_name.clone(),
settings.service_version.clone(),
metric_exporter,
);
if settings.runtime_metrics {
config = config.with_runtime_reader();
}
Some(MetricsClient::new(config)?)
};
if let Some(metrics) = metrics.as_ref() {
crate::metrics::install_global(metrics.clone());
}
if !log_enabled && !trace_enabled && metrics.is_none() {
debug!("No OTEL exporter enabled in settings.");
return Ok(None);
}
let log_resource = make_resource(settings, ResourceKind::Logs);
let trace_resource = make_resource(settings, ResourceKind::Traces);
let logger = log_enabled
.then(|| build_logger(&log_resource, &settings.exporter))
.transpose()?;
let tracer_provider = trace_enabled
.then(|| build_tracer_provider(&trace_resource, &settings.trace_exporter))
.transpose()?;
let tracer = tracer_provider
.as_ref()
.map(|provider| provider.tracer(settings.service_name.clone()));
if let Some(provider) = tracer_provider.clone() {
global::set_tracer_provider(provider);
global::set_text_map_propagator(TraceContextPropagator::new());
}
Ok(Some(Self {
logger,
tracer_provider,
tracer,
metrics,
}))
}
pub fn logger_layer<S>(&self) -> Option<impl Layer<S> + Send + Sync>
where
S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
{
self.logger.as_ref().map(|logger| {
OpenTelemetryTracingBridge::new(logger).with_filter(
tracing_subscriber::filter::filter_fn(OtelProvider::log_export_filter),
)
})
}
pub fn tracing_layer<S>(&self) -> Option<impl Layer<S> + Send + Sync>
where
S: tracing::Subscriber + for<'span> LookupSpan<'span> + Send + Sync,
{
self.tracer.as_ref().map(|tracer| {
tracing_opentelemetry::layer()
.with_tracer(tracer.clone())
.with_filter(tracing_subscriber::filter::filter_fn(
OtelProvider::trace_export_filter,
))
})
}
pub fn codex_export_filter(meta: &tracing::Metadata<'_>) -> bool {
Self::log_export_filter(meta)
}
pub fn log_export_filter(meta: &tracing::Metadata<'_>) -> bool {
is_log_export_target(meta.target())
}
pub fn trace_export_filter(meta: &tracing::Metadata<'_>) -> bool {
meta.is_span() || is_trace_safe_target(meta.target())
}
pub fn metrics(&self) -> Option<&MetricsClient> {
self.metrics.as_ref()
}
}
impl Drop for OtelProvider {
fn drop(&mut self) {
if let Some(logger) = &self.logger {
let _ = logger.shutdown();
}
if let Some(tracer_provider) = &self.tracer_provider {
let _ = tracer_provider.shutdown();
}
if let Some(metrics) = &self.metrics {
let _ = metrics.shutdown();
}
}
}
fn make_resource(settings: &OtelSettings, kind: ResourceKind) -> Resource {
Resource::builder()
.with_service_name(settings.service_name.clone())
.with_attributes(resource_attributes(
settings,
detected_host_name().as_deref(),
kind,
))
.build()
}
fn resource_attributes(
settings: &OtelSettings,
host_name: Option<&str>,
kind: ResourceKind,
) -> Vec<KeyValue> {
let mut attributes = vec![
KeyValue::new(
semconv::attribute::SERVICE_VERSION,
settings.service_version.clone(),
),
KeyValue::new(ENV_ATTRIBUTE, settings.environment.clone()),
];
if kind == ResourceKind::Logs
&& let Some(host_name) = host_name.and_then(normalize_host_name)
{
attributes.push(KeyValue::new(HOST_NAME_ATTRIBUTE, host_name));
}
attributes
}
fn detected_host_name() -> Option<String> {
let host_name = gethostname();
normalize_host_name(host_name.to_string_lossy().as_ref())
}
fn normalize_host_name(host_name: &str) -> Option<String> {
let host_name = host_name.trim();
(!host_name.is_empty()).then(|| host_name.to_owned())
}
fn build_logger(
resource: &Resource,
exporter: &OtelExporter,
) -> Result<SdkLoggerProvider, Box<dyn Error>> {
let mut builder = SdkLoggerProvider::builder().with_resource(resource.clone());
match crate::config::resolve_exporter(exporter) {
OtelExporter::None => return Ok(builder.build()),
OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"),
OtelExporter::OtlpGrpc {
endpoint,
headers,
tls,
} => {
debug!("Using OTLP Grpc exporter: {endpoint}");
let header_map = crate::otlp::build_header_map(&headers);
let base_tls_config = ClientTlsConfig::new()
.with_enabled_roots()
.assume_http2(true);
let tls_config = match tls.as_ref() {
Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?,
None => base_tls_config,
};
let exporter = LogExporter::builder()
.with_tonic()
.with_endpoint(endpoint)
.with_metadata(MetadataMap::from_headers(header_map))
.with_tls_config(tls_config)
.build()?;
builder = builder.with_batch_exporter(exporter);
}
OtelExporter::OtlpHttp {
endpoint,
headers,
protocol,
tls,
} => {
debug!("Using OTLP Http exporter: {endpoint}");
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,
};
let mut exporter_builder = LogExporter::builder()
.with_http()
.with_endpoint(endpoint)
.with_protocol(protocol)
.with_headers(headers);
if let Some(tls) = tls.as_ref() {
let client = crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_LOGS_TIMEOUT)?;
exporter_builder = exporter_builder.with_http_client(client);
}
let exporter = exporter_builder.build()?;
builder = builder.with_batch_exporter(exporter);
}
}
Ok(builder.build())
}
fn build_tracer_provider(
resource: &Resource,
exporter: &OtelExporter,
) -> Result<SdkTracerProvider, Box<dyn Error>> {
let span_exporter = match crate::config::resolve_exporter(exporter) {
OtelExporter::None => return Ok(SdkTracerProvider::builder().build()),
OtelExporter::Statsig => unreachable!("statsig exporter should be resolved"),
OtelExporter::OtlpGrpc {
endpoint,
headers,
tls,
} => {
debug!("Using OTLP Grpc exporter for traces: {endpoint}");
let header_map = crate::otlp::build_header_map(&headers);
let base_tls_config = ClientTlsConfig::new()
.with_enabled_roots()
.assume_http2(true);
let tls_config = match tls.as_ref() {
Some(tls) => crate::otlp::build_grpc_tls_config(&endpoint, base_tls_config, tls)?,
None => base_tls_config,
};
SpanExporter::builder()
.with_tonic()
.with_endpoint(endpoint)
.with_metadata(MetadataMap::from_headers(header_map))
.with_tls_config(tls_config)
.build()?
}
OtelExporter::OtlpHttp {
endpoint,
headers,
protocol,
tls,
} => {
debug!("Using OTLP Http exporter for traces: {endpoint}");
let protocol = match protocol {
OtelHttpProtocol::Binary => Protocol::HttpBinary,
OtelHttpProtocol::Json => Protocol::HttpJson,
};
let mut exporter_builder = SpanExporter::builder()
.with_http()
.with_endpoint(endpoint)
.with_protocol(protocol)
.with_headers(headers);
if let Some(tls) = tls.as_ref() {
let client =
crate::otlp::build_http_client(tls, OTEL_EXPORTER_OTLP_TRACES_TIMEOUT)?;
exporter_builder = exporter_builder.with_http_client(client);
}
exporter_builder.build()?
}
};
let processor = BatchSpanProcessor::builder(span_exporter).build();
Ok(SdkTracerProvider::builder()
.with_resource(resource.clone())
.with_span_processor(processor)
.build())
}
#[cfg(test)]
mod tests {
use super::*;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
#[test]
fn resource_attributes_include_host_name_when_present() {
let attrs = resource_attributes(
&test_otel_settings(),
Some("opentelemetry-test"),
ResourceKind::Logs,
);
let host_name = attrs
.iter()
.find(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
.map(|kv| kv.value.as_str().to_string());
assert_eq!(host_name, Some("opentelemetry-test".to_string()));
}
#[test]
fn resource_attributes_omit_host_name_when_missing_or_empty() {
let missing = resource_attributes(&test_otel_settings(), None, ResourceKind::Logs);
let empty = resource_attributes(&test_otel_settings(), Some(" "), ResourceKind::Logs);
let trace_attrs = resource_attributes(
&test_otel_settings(),
Some("opentelemetry-test"),
ResourceKind::Traces,
);
assert!(
!missing
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
assert!(
!empty
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
assert!(
!trace_attrs
.iter()
.any(|kv| kv.key.as_str() == HOST_NAME_ATTRIBUTE)
);
}
#[test]
fn log_export_target_excludes_trace_safe_events() {
assert!(is_log_export_target("codex_otel.log_only"));
assert!(is_log_export_target("codex_otel.network_proxy"));
assert!(!is_log_export_target("codex_otel.trace_safe"));
assert!(!is_log_export_target("codex_otel.trace_safe.debug"));
}
#[test]
fn trace_export_target_only_includes_trace_safe_prefix() {
assert!(is_trace_safe_target("codex_otel.trace_safe"));
assert!(is_trace_safe_target("codex_otel.trace_safe.summary"));
assert!(!is_trace_safe_target("codex_otel.log_only"));
assert!(!is_trace_safe_target("codex_otel.network_proxy"));
}
fn test_otel_settings() -> OtelSettings {
OtelSettings {
environment: "test".to_string(),
service_name: "codex-test".to_string(),
service_version: "0.0.0".to_string(),
codex_home: PathBuf::from("."),
exporter: OtelExporter::None,
trace_exporter: OtelExporter::None,
metrics_exporter: OtelExporter::None,
runtime_metrics: false,
}
}
}

View file

@ -0,0 +1,11 @@
pub(crate) const OTEL_TARGET_PREFIX: &str = "codex_otel";
pub(crate) const OTEL_LOG_ONLY_TARGET: &str = "codex_otel.log_only";
pub(crate) const OTEL_TRACE_SAFE_TARGET: &str = "codex_otel.trace_safe";
pub(crate) fn is_log_export_target(target: &str) -> bool {
target.starts_with(OTEL_TARGET_PREFIX) && !is_trace_safe_target(target)
}
pub(crate) fn is_trace_safe_target(target: &str) -> bool {
target.starts_with(OTEL_TRACE_SAFE_TARGET)
}

View file

@ -1,4 +1,6 @@
use std::collections::HashMap;
use std::env;
use std::sync::OnceLock;
use codex_protocol::protocol::W3cTraceContext;
use opentelemetry::Context;
@ -6,8 +8,14 @@ use opentelemetry::propagation::TextMapPropagator;
use opentelemetry::trace::TraceContextExt;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use tracing::Span;
use tracing::debug;
use tracing::warn;
use tracing_opentelemetry::OpenTelemetrySpanExt;
const TRACEPARENT_ENV_VAR: &str = "TRACEPARENT";
const TRACESTATE_ENV_VAR: &str = "TRACESTATE";
static TRACEPARENT_CONTEXT: OnceLock<Option<Context>> = OnceLock::new();
pub fn current_span_w3c_trace_context() -> Option<W3cTraceContext> {
let context = Span::current().context();
if !context.span().span_context().is_valid() {
@ -51,6 +59,12 @@ pub fn set_parent_from_context(span: &Span, context: Context) {
let _ = span.set_parent(context);
}
pub fn traceparent_context_from_env() -> Option<Context> {
TRACEPARENT_CONTEXT
.get_or_init(load_traceparent_context)
.clone()
}
pub(crate) fn context_from_trace_headers(
traceparent: Option<&str>,
tracestate: Option<&str>,
@ -69,6 +83,22 @@ pub(crate) fn context_from_trace_headers(
Some(context)
}
fn load_traceparent_context() -> Option<Context> {
let traceparent = env::var(TRACEPARENT_ENV_VAR).ok()?;
let tracestate = env::var(TRACESTATE_ENV_VAR).ok();
match context_from_trace_headers(Some(&traceparent), tracestate.as_deref()) {
Some(context) => {
debug!("TRACEPARENT detected; continuing trace from parent context");
Some(context)
}
None => {
warn!("TRACEPARENT is set but invalid; ignoring trace context");
None
}
}
}
#[cfg(test)]
mod tests {
use super::context_from_trace_headers;

View file

@ -1 +0,0 @@
pub mod otel_manager;