From 4ee039744ef5e1aa16657cb8be06cdc4dc6cdcb1 Mon Sep 17 00:00:00 2001 From: Anton Panasenko Date: Thu, 5 Feb 2026 18:22:30 -0800 Subject: [PATCH] feat: expose detailed metrics to runtime metrics (#10699) --- .../core/tests/suite/client_websockets.rs | 10 ++- codex-rs/otel/src/metrics/names.rs | 8 +++ codex-rs/otel/src/metrics/runtime_metrics.rs | 67 +++++++++++++++++ codex-rs/otel/src/traces/otel_manager.rs | 44 ++++++++++++ codex-rs/otel/tests/suite/runtime_summary.rs | 6 +- codex-rs/tui/src/chatwidget.rs | 71 +++++++++++++++++-- codex-rs/tui/src/chatwidget/tests.rs | 46 ++++++++++++ codex-rs/tui/src/history_cell.rs | 38 +++++++++- 8 files changed, 279 insertions(+), 11 deletions(-) diff --git a/codex-rs/core/tests/suite/client_websockets.rs b/codex-rs/core/tests/suite/client_websockets.rs index 76a1f0b3c..bf170bb75 100644 --- a/codex-rs/core/tests/suite/client_websockets.rs +++ b/codex-rs/core/tests/suite/client_websockets.rs @@ -127,7 +127,11 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics "type": "responsesapi.websocket_timing", "timing_metrics": { "responses_duration_excl_engine_and_client_tool_time_ms": 120, - "engine_service_total_ms": 450 + "engine_service_total_ms": 450, + "engine_iapi_ttft_total_ms": 310, + "engine_service_ttft_total_ms": 340, + "engine_iapi_tbt_across_engine_calls_ms": 220, + "engine_service_tbt_across_engine_calls_ms": 260 } }), ev_completed("resp-1"), @@ -154,6 +158,10 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics .expect("runtime metrics summary"); assert_eq!(summary.responses_api_overhead_ms, 120); assert_eq!(summary.responses_api_inference_time_ms, 450); + assert_eq!(summary.responses_api_engine_iapi_ttft_ms, 310); + assert_eq!(summary.responses_api_engine_service_ttft_ms, 340); + assert_eq!(summary.responses_api_engine_iapi_tbt_ms, 220); + assert_eq!(summary.responses_api_engine_service_tbt_ms, 260); server.shutdown().await; } diff --git a/codex-rs/otel/src/metrics/names.rs b/codex-rs/otel/src/metrics/names.rs index 97c4f4e6e..76b46d245 100644 --- a/codex-rs/otel/src/metrics/names.rs +++ b/codex-rs/otel/src/metrics/names.rs @@ -12,3 +12,11 @@ pub(crate) const RESPONSES_API_OVERHEAD_DURATION_METRIC: &str = "codex.responses_api_overhead.duration_ms"; pub(crate) const RESPONSES_API_INFERENCE_TIME_DURATION_METRIC: &str = "codex.responses_api_inference_time.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC: &str = + "codex.responses_api_engine_iapi_ttft.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC: &str = + "codex.responses_api_engine_service_ttft.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC: &str = + "codex.responses_api_engine_iapi_tbt.duration_ms"; +pub(crate) const RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC: &str = + "codex.responses_api_engine_service_tbt.duration_ms"; diff --git a/codex-rs/otel/src/metrics/runtime_metrics.rs b/codex-rs/otel/src/metrics/runtime_metrics.rs index d59ff4b1b..dcac367d8 100644 --- a/codex-rs/otel/src/metrics/runtime_metrics.rs +++ b/codex-rs/otel/src/metrics/runtime_metrics.rs @@ -1,5 +1,9 @@ use crate::metrics::names::API_CALL_COUNT_METRIC; use crate::metrics::names::API_CALL_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC; use crate::metrics::names::SSE_EVENT_COUNT_METRIC; @@ -25,6 +29,11 @@ impl RuntimeMetricTotals { pub fn is_empty(self) -> bool { self.count == 0 && self.duration_ms == 0 } + + pub fn merge(&mut self, other: Self) { + self.count = self.count.saturating_add(other.count); + self.duration_ms = self.duration_ms.saturating_add(other.duration_ms); + } } #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] @@ -36,6 +45,10 @@ pub struct RuntimeMetricsSummary { pub websocket_events: RuntimeMetricTotals, pub responses_api_overhead_ms: u64, pub responses_api_inference_time_ms: u64, + pub responses_api_engine_iapi_ttft_ms: u64, + pub responses_api_engine_service_ttft_ms: u64, + pub responses_api_engine_iapi_tbt_ms: u64, + pub responses_api_engine_service_tbt_ms: u64, } impl RuntimeMetricsSummary { @@ -47,6 +60,48 @@ impl RuntimeMetricsSummary { && self.websocket_events.is_empty() && self.responses_api_overhead_ms == 0 && self.responses_api_inference_time_ms == 0 + && self.responses_api_engine_iapi_ttft_ms == 0 + && self.responses_api_engine_service_ttft_ms == 0 + && self.responses_api_engine_iapi_tbt_ms == 0 + && self.responses_api_engine_service_tbt_ms == 0 + } + + pub fn merge(&mut self, other: Self) { + self.tool_calls.merge(other.tool_calls); + self.api_calls.merge(other.api_calls); + self.streaming_events.merge(other.streaming_events); + self.websocket_calls.merge(other.websocket_calls); + self.websocket_events.merge(other.websocket_events); + if other.responses_api_overhead_ms > 0 { + self.responses_api_overhead_ms = other.responses_api_overhead_ms; + } + if other.responses_api_inference_time_ms > 0 { + self.responses_api_inference_time_ms = other.responses_api_inference_time_ms; + } + if other.responses_api_engine_iapi_ttft_ms > 0 { + self.responses_api_engine_iapi_ttft_ms = other.responses_api_engine_iapi_ttft_ms; + } + if other.responses_api_engine_service_ttft_ms > 0 { + self.responses_api_engine_service_ttft_ms = other.responses_api_engine_service_ttft_ms; + } + if other.responses_api_engine_iapi_tbt_ms > 0 { + self.responses_api_engine_iapi_tbt_ms = other.responses_api_engine_iapi_tbt_ms; + } + if other.responses_api_engine_service_tbt_ms > 0 { + self.responses_api_engine_service_tbt_ms = other.responses_api_engine_service_tbt_ms; + } + } + + pub fn responses_api_summary(&self) -> RuntimeMetricsSummary { + Self { + responses_api_overhead_ms: self.responses_api_overhead_ms, + responses_api_inference_time_ms: self.responses_api_inference_time_ms, + responses_api_engine_iapi_ttft_ms: self.responses_api_engine_iapi_ttft_ms, + responses_api_engine_service_ttft_ms: self.responses_api_engine_service_ttft_ms, + responses_api_engine_iapi_tbt_ms: self.responses_api_engine_iapi_tbt_ms, + responses_api_engine_service_tbt_ms: self.responses_api_engine_service_tbt_ms, + ..RuntimeMetricsSummary::default() + } } pub(crate) fn from_snapshot(snapshot: &ResourceMetrics) -> Self { @@ -74,6 +129,14 @@ impl RuntimeMetricsSummary { sum_histogram_ms(snapshot, RESPONSES_API_OVERHEAD_DURATION_METRIC); let responses_api_inference_time_ms = sum_histogram_ms(snapshot, RESPONSES_API_INFERENCE_TIME_DURATION_METRIC); + let responses_api_engine_iapi_ttft_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC); + let responses_api_engine_service_ttft_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC); + let responses_api_engine_iapi_tbt_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC); + let responses_api_engine_service_tbt_ms = + sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC); Self { tool_calls, api_calls, @@ -82,6 +145,10 @@ impl RuntimeMetricsSummary { websocket_events, responses_api_overhead_ms, responses_api_inference_time_ms, + responses_api_engine_iapi_ttft_ms, + responses_api_engine_service_ttft_ms, + responses_api_engine_iapi_tbt_ms, + responses_api_engine_service_tbt_ms, } } } diff --git a/codex-rs/otel/src/traces/otel_manager.rs b/codex-rs/otel/src/traces/otel_manager.rs index aa3006da0..c54c64c6c 100644 --- a/codex-rs/otel/src/traces/otel_manager.rs +++ b/codex-rs/otel/src/traces/otel_manager.rs @@ -1,6 +1,10 @@ use crate::TelemetryAuthMode; use crate::metrics::names::API_CALL_COUNT_METRIC; use crate::metrics::names::API_CALL_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC; +use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC; use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC; use crate::metrics::names::SSE_EVENT_COUNT_METRIC; @@ -48,6 +52,10 @@ const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing"; const RESPONSES_WEBSOCKET_TIMING_METRICS_FIELD: &str = "timing_metrics"; const RESPONSES_API_OVERHEAD_FIELD: &str = "responses_duration_excl_engine_and_client_tool_time_ms"; const RESPONSES_API_INFERENCE_FIELD: &str = "engine_service_total_ms"; +const RESPONSES_API_ENGINE_IAPI_TTFT_FIELD: &str = "engine_iapi_ttft_total_ms"; +const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total_ms"; +const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms"; +const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms"; impl OtelManager { #[allow(clippy::too_many_arguments)] @@ -674,6 +682,42 @@ impl OtelManager { if let Some(duration) = duration_from_ms_value(inference_value) { self.record_duration(RESPONSES_API_INFERENCE_TIME_DURATION_METRIC, duration, &[]); } + + let engine_iapi_ttft_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TTFT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_iapi_ttft_value) { + self.record_duration( + RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC, + duration, + &[], + ); + } + + let engine_service_ttft_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_service_ttft_value) { + self.record_duration( + RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC, + duration, + &[], + ); + } + + let engine_iapi_tbt_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TBT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_iapi_tbt_value) { + self.record_duration(RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC, duration, &[]); + } + + let engine_service_tbt_value = + timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TBT_FIELD)); + if let Some(duration) = duration_from_ms_value(engine_service_tbt_value) { + self.record_duration( + RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC, + duration, + &[], + ); + } } fn responses_type(event: &ResponseEvent) -> String { diff --git a/codex-rs/otel/tests/suite/runtime_summary.rs b/codex-rs/otel/tests/suite/runtime_summary.rs index 6ef2927f4..79c6b258e 100644 --- a/codex-rs/otel/tests/suite/runtime_summary.rs +++ b/codex-rs/otel/tests/suite/runtime_summary.rs @@ -67,7 +67,7 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<( Option>, codex_api::ApiError, > = Ok(Some(Ok(Message::Text( - r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457}}"# + r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457,"engine_iapi_ttft_total_ms":211,"engine_service_ttft_total_ms":233,"engine_iapi_tbt_across_engine_calls_ms":377,"engine_service_tbt_across_engine_calls_ms":399}}"# .into(), )))); manager.record_websocket_event(&ws_timing_response, Duration::from_millis(20)); @@ -98,6 +98,10 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<( }, responses_api_overhead_ms: 124, responses_api_inference_time_ms: 457, + responses_api_engine_iapi_ttft_ms: 211, + responses_api_engine_service_ttft_ms: 233, + responses_api_engine_iapi_tbt_ms: 377, + responses_api_engine_service_tbt_ms: 399, }; assert_eq!(summary, expected); diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 93d21610d..39e757fa7 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -105,6 +105,7 @@ use codex_core::skills::model::SkillMetadata; #[cfg(target_os = "windows")] use codex_core::windows_sandbox::WindowsSandboxLevelExt; use codex_otel::OtelManager; +use codex_otel::RuntimeMetricsSummary; use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::approvals::ElicitationRequestEvent; @@ -587,7 +588,8 @@ pub(crate) struct ChatWidget { // This lets the separator show per-chunk work time (since the previous separator) rather than // the total task-running time reported by the status indicator. last_separator_elapsed_secs: Option, - + // Runtime metrics accumulated across delta snapshots for the active turn. + turn_runtime_metrics: RuntimeMetricsSummary, last_rendered_width: std::cell::Cell>, // Feedback sink for /feedback feedback: codex_feedback::CodexFeedback, @@ -931,6 +933,32 @@ impl ChatWidget { self.request_status_line_branch(cwd); } + fn collect_runtime_metrics_delta(&mut self) { + if let Some(delta) = self.otel_manager.runtime_metrics_summary() { + self.apply_runtime_metrics_delta(delta); + } + } + + fn apply_runtime_metrics_delta(&mut self, delta: RuntimeMetricsSummary) { + let should_log_timing = has_websocket_timing_metrics(delta); + self.turn_runtime_metrics.merge(delta); + if should_log_timing { + self.log_websocket_timing_totals(delta); + } + } + + fn log_websocket_timing_totals(&mut self, delta: RuntimeMetricsSummary) { + if let Some(label) = history_cell::runtime_metrics_label(delta.responses_api_summary()) { + self.add_plain_history_lines(vec![ + vec!["• ".dim(), format!("WebSocket timing: {label}").dark_gray()].into(), + ]); + } + } + + fn refresh_runtime_metrics(&mut self) { + self.collect_runtime_metrics_delta(); + } + fn restore_retry_status_header_if_present(&mut self) { if let Some(header) = self.retry_status_header.take() { self.set_status_header(header); @@ -1218,6 +1246,7 @@ impl ChatWidget { self.plan_item_active = false; self.adaptive_chunking.reset(); self.plan_stream_controller = None; + self.turn_runtime_metrics = RuntimeMetricsSummary::default(); self.otel_manager.reset_runtime_metrics(); self.bottom_pane.clear_quit_shortcut_hint(); self.quit_shortcut_expires_at = None; @@ -1241,17 +1270,25 @@ impl ChatWidget { } self.flush_unified_exec_wait_streak(); if !from_replay { - let runtime_metrics = self.otel_manager.runtime_metrics_summary(); - if runtime_metrics.is_some() { - let elapsed_seconds = self - .bottom_pane - .status_widget() - .map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds); + self.collect_runtime_metrics_delta(); + let runtime_metrics = + (!self.turn_runtime_metrics.is_empty()).then_some(self.turn_runtime_metrics); + let show_work_separator = self.needs_final_message_separator && self.had_work_activity; + if show_work_separator || runtime_metrics.is_some() { + let elapsed_seconds = if show_work_separator { + self.bottom_pane + .status_widget() + .map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds) + .map(|current| self.worked_elapsed_from(current)) + } else { + None + }; self.add_to_history(history_cell::FinalMessageSeparator::new( elapsed_seconds, runtime_metrics, )); } + self.turn_runtime_metrics = RuntimeMetricsSummary::default(); self.needs_final_message_separator = false; self.had_work_activity = false; self.request_status_line_branch_refresh(); @@ -2087,6 +2124,10 @@ impl ChatWidget { } self.app_event_tx.send(AppEvent::StopCommitAnimation); } + + if self.agent_turn_running { + self.refresh_runtime_metrics(); + } } fn flush_interrupt_queue(&mut self) { @@ -2529,6 +2570,7 @@ impl ChatWidget { plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -2690,6 +2732,7 @@ impl ChatWidget { needs_final_message_separator: false, had_work_activity: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -2840,6 +2883,7 @@ impl ChatWidget { plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback, feedback_audience, @@ -3859,6 +3903,10 @@ impl ChatWidget { } } } + + if !from_replay && self.agent_turn_running { + self.refresh_runtime_metrics(); + } } fn on_entered_review_mode(&mut self, review: ReviewRequest, from_replay: bool) { @@ -6690,6 +6738,15 @@ impl ChatWidget { } } +fn has_websocket_timing_metrics(summary: RuntimeMetricsSummary) -> bool { + summary.responses_api_overhead_ms > 0 + || summary.responses_api_inference_time_ms > 0 + || summary.responses_api_engine_iapi_ttft_ms > 0 + || summary.responses_api_engine_service_ttft_ms > 0 + || summary.responses_api_engine_iapi_tbt_ms > 0 + || summary.responses_api_engine_service_tbt_ms > 0 +} + impl Drop for ChatWidget { fn drop(&mut self) { self.stop_rate_limit_poller(); diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index f49493b6a..1b70abf97 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -62,6 +62,7 @@ use codex_core::protocol::UndoStartedEvent; use codex_core::protocol::ViewImageToolCallEvent; use codex_core::protocol::WarningEvent; use codex_otel::OtelManager; +use codex_otel::RuntimeMetricsSummary; use codex_protocol::ThreadId; use codex_protocol::account::PlanType; use codex_protocol::config_types::CollaborationMode; @@ -955,6 +956,7 @@ async fn make_chatwidget_manual( plan_delta_buffer: String::new(), plan_item_active: false, last_separator_elapsed_secs: None, + turn_runtime_metrics: RuntimeMetricsSummary::default(), last_rendered_width: std::cell::Cell::new(None), feedback: codex_feedback::CodexFeedback::new(), feedback_audience: FeedbackAudience::External, @@ -5029,6 +5031,50 @@ async fn stream_recovery_restores_previous_status_header() { assert!(chat.retry_status_header.is_none()); } +#[tokio::test] +async fn runtime_metrics_websocket_timing_logs_and_final_separator_sums_totals() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + chat.set_feature_enabled(Feature::RuntimeMetrics, true); + + chat.on_task_started(); + chat.apply_runtime_metrics_delta(RuntimeMetricsSummary { + responses_api_engine_iapi_ttft_ms: 120, + responses_api_engine_service_tbt_ms: 50, + ..RuntimeMetricsSummary::default() + }); + + let first_log = drain_insert_history(&mut rx) + .iter() + .map(|lines| lines_to_single_string(lines)) + .find(|line| line.contains("WebSocket timing:")) + .expect("expected websocket timing log"); + assert!(first_log.contains("TTFT: 120ms (iapi)")); + assert!(first_log.contains("TBT: 50ms (service)")); + + chat.apply_runtime_metrics_delta(RuntimeMetricsSummary { + responses_api_engine_iapi_ttft_ms: 80, + ..RuntimeMetricsSummary::default() + }); + + let second_log = drain_insert_history(&mut rx) + .iter() + .map(|lines| lines_to_single_string(lines)) + .find(|line| line.contains("WebSocket timing:")) + .expect("expected websocket timing log"); + assert!(second_log.contains("TTFT: 80ms (iapi)")); + + chat.on_task_complete(None, false); + let mut final_separator = None; + while let Ok(event) = rx.try_recv() { + if let AppEvent::InsertHistoryCell(cell) = event { + final_separator = Some(lines_to_single_string(&cell.display_lines(300))); + } + } + let final_separator = final_separator.expect("expected final separator with runtime metrics"); + assert!(final_separator.contains("TTFT: 80ms (iapi)")); + assert!(final_separator.contains("TBT: 50ms (service)")); +} + #[tokio::test] async fn multiple_agent_messages_in_single_turn_emit_multiple_headers() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; diff --git a/codex-rs/tui/src/history_cell.rs b/codex-rs/tui/src/history_cell.rs index b85ca8f6b..26a91dc6e 100644 --- a/codex-rs/tui/src/history_cell.rs +++ b/codex-rs/tui/src/history_cell.rs @@ -2172,7 +2172,7 @@ impl HistoryCell for FinalMessageSeparator { } } -fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { +pub(crate) fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { let mut parts = Vec::new(); if summary.tool_calls.count > 0 { let duration = format_duration_ms(summary.tool_calls.duration_ms); @@ -2221,6 +2221,34 @@ fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option { let duration = format_duration_ms(summary.responses_api_inference_time_ms); parts.push(format!("Responses API inference: {duration}")); } + if summary.responses_api_engine_iapi_ttft_ms > 0 + || summary.responses_api_engine_service_ttft_ms > 0 + { + let mut ttft_parts = Vec::new(); + if summary.responses_api_engine_iapi_ttft_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_iapi_ttft_ms); + ttft_parts.push(format!("{duration} (iapi)")); + } + if summary.responses_api_engine_service_ttft_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_service_ttft_ms); + ttft_parts.push(format!("{duration} (service)")); + } + parts.push(format!("TTFT: {}", ttft_parts.join(" "))); + } + if summary.responses_api_engine_iapi_tbt_ms > 0 + || summary.responses_api_engine_service_tbt_ms > 0 + { + let mut tbt_parts = Vec::new(); + if summary.responses_api_engine_iapi_tbt_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_iapi_tbt_ms); + tbt_parts.push(format!("{duration} (iapi)")); + } + if summary.responses_api_engine_service_tbt_ms > 0 { + let duration = format_duration_ms(summary.responses_api_engine_service_tbt_ms); + tbt_parts.push(format!("{duration} (service)")); + } + parts.push(format!("TBT: {}", tbt_parts.join(" "))); + } if parts.is_empty() { None } else { @@ -2391,9 +2419,13 @@ mod tests { }, responses_api_overhead_ms: 650, responses_api_inference_time_ms: 1_940, + responses_api_engine_iapi_ttft_ms: 410, + responses_api_engine_service_ttft_ms: 460, + responses_api_engine_iapi_tbt_ms: 1_180, + responses_api_engine_service_tbt_ms: 1_240, }; let cell = FinalMessageSeparator::new(Some(12), Some(summary)); - let rendered = render_lines(&cell.display_lines(300)); + let rendered = render_lines(&cell.display_lines(600)); assert_eq!(rendered.len(), 1); assert!(!rendered[0].contains("Worked for")); @@ -2404,6 +2436,8 @@ mod tests { assert!(rendered[0].contains("4 events received (1.2s)")); assert!(rendered[0].contains("Responses API overhead: 650ms")); assert!(rendered[0].contains("Responses API inference: 1.9s")); + assert!(rendered[0].contains("TTFT: 410ms (iapi) 460ms (service)")); + assert!(rendered[0].contains("TBT: 1.2s (iapi) 1.2s (service)")); } #[test]