feat: expose detailed metrics to runtime metrics (#10699)

This commit is contained in:
Anton Panasenko 2026-02-05 18:22:30 -08:00 committed by GitHub
parent d74fa8edd1
commit 4ee039744e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 279 additions and 11 deletions

View file

@ -127,7 +127,11 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics
"type": "responsesapi.websocket_timing",
"timing_metrics": {
"responses_duration_excl_engine_and_client_tool_time_ms": 120,
"engine_service_total_ms": 450
"engine_service_total_ms": 450,
"engine_iapi_ttft_total_ms": 310,
"engine_service_ttft_total_ms": 340,
"engine_iapi_tbt_across_engine_calls_ms": 220,
"engine_service_tbt_across_engine_calls_ms": 260
}
}),
ev_completed("resp-1"),
@ -154,6 +158,10 @@ async fn responses_websocket_includes_timing_metrics_header_when_runtime_metrics
.expect("runtime metrics summary");
assert_eq!(summary.responses_api_overhead_ms, 120);
assert_eq!(summary.responses_api_inference_time_ms, 450);
assert_eq!(summary.responses_api_engine_iapi_ttft_ms, 310);
assert_eq!(summary.responses_api_engine_service_ttft_ms, 340);
assert_eq!(summary.responses_api_engine_iapi_tbt_ms, 220);
assert_eq!(summary.responses_api_engine_service_tbt_ms, 260);
server.shutdown().await;
}

View file

@ -12,3 +12,11 @@ pub(crate) const RESPONSES_API_OVERHEAD_DURATION_METRIC: &str =
"codex.responses_api_overhead.duration_ms";
pub(crate) const RESPONSES_API_INFERENCE_TIME_DURATION_METRIC: &str =
"codex.responses_api_inference_time.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC: &str =
"codex.responses_api_engine_iapi_ttft.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC: &str =
"codex.responses_api_engine_service_ttft.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC: &str =
"codex.responses_api_engine_iapi_tbt.duration_ms";
pub(crate) const RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC: &str =
"codex.responses_api_engine_service_tbt.duration_ms";

View file

@ -1,5 +1,9 @@
use crate::metrics::names::API_CALL_COUNT_METRIC;
use crate::metrics::names::API_CALL_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC;
use crate::metrics::names::SSE_EVENT_COUNT_METRIC;
@ -25,6 +29,11 @@ impl RuntimeMetricTotals {
pub fn is_empty(self) -> bool {
self.count == 0 && self.duration_ms == 0
}
pub fn merge(&mut self, other: Self) {
self.count = self.count.saturating_add(other.count);
self.duration_ms = self.duration_ms.saturating_add(other.duration_ms);
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
@ -36,6 +45,10 @@ pub struct RuntimeMetricsSummary {
pub websocket_events: RuntimeMetricTotals,
pub responses_api_overhead_ms: u64,
pub responses_api_inference_time_ms: u64,
pub responses_api_engine_iapi_ttft_ms: u64,
pub responses_api_engine_service_ttft_ms: u64,
pub responses_api_engine_iapi_tbt_ms: u64,
pub responses_api_engine_service_tbt_ms: u64,
}
impl RuntimeMetricsSummary {
@ -47,6 +60,48 @@ impl RuntimeMetricsSummary {
&& self.websocket_events.is_empty()
&& self.responses_api_overhead_ms == 0
&& self.responses_api_inference_time_ms == 0
&& self.responses_api_engine_iapi_ttft_ms == 0
&& self.responses_api_engine_service_ttft_ms == 0
&& self.responses_api_engine_iapi_tbt_ms == 0
&& self.responses_api_engine_service_tbt_ms == 0
}
pub fn merge(&mut self, other: Self) {
self.tool_calls.merge(other.tool_calls);
self.api_calls.merge(other.api_calls);
self.streaming_events.merge(other.streaming_events);
self.websocket_calls.merge(other.websocket_calls);
self.websocket_events.merge(other.websocket_events);
if other.responses_api_overhead_ms > 0 {
self.responses_api_overhead_ms = other.responses_api_overhead_ms;
}
if other.responses_api_inference_time_ms > 0 {
self.responses_api_inference_time_ms = other.responses_api_inference_time_ms;
}
if other.responses_api_engine_iapi_ttft_ms > 0 {
self.responses_api_engine_iapi_ttft_ms = other.responses_api_engine_iapi_ttft_ms;
}
if other.responses_api_engine_service_ttft_ms > 0 {
self.responses_api_engine_service_ttft_ms = other.responses_api_engine_service_ttft_ms;
}
if other.responses_api_engine_iapi_tbt_ms > 0 {
self.responses_api_engine_iapi_tbt_ms = other.responses_api_engine_iapi_tbt_ms;
}
if other.responses_api_engine_service_tbt_ms > 0 {
self.responses_api_engine_service_tbt_ms = other.responses_api_engine_service_tbt_ms;
}
}
pub fn responses_api_summary(&self) -> RuntimeMetricsSummary {
Self {
responses_api_overhead_ms: self.responses_api_overhead_ms,
responses_api_inference_time_ms: self.responses_api_inference_time_ms,
responses_api_engine_iapi_ttft_ms: self.responses_api_engine_iapi_ttft_ms,
responses_api_engine_service_ttft_ms: self.responses_api_engine_service_ttft_ms,
responses_api_engine_iapi_tbt_ms: self.responses_api_engine_iapi_tbt_ms,
responses_api_engine_service_tbt_ms: self.responses_api_engine_service_tbt_ms,
..RuntimeMetricsSummary::default()
}
}
pub(crate) fn from_snapshot(snapshot: &ResourceMetrics) -> Self {
@ -74,6 +129,14 @@ impl RuntimeMetricsSummary {
sum_histogram_ms(snapshot, RESPONSES_API_OVERHEAD_DURATION_METRIC);
let responses_api_inference_time_ms =
sum_histogram_ms(snapshot, RESPONSES_API_INFERENCE_TIME_DURATION_METRIC);
let responses_api_engine_iapi_ttft_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC);
let responses_api_engine_service_ttft_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC);
let responses_api_engine_iapi_tbt_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC);
let responses_api_engine_service_tbt_ms =
sum_histogram_ms(snapshot, RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC);
Self {
tool_calls,
api_calls,
@ -82,6 +145,10 @@ impl RuntimeMetricsSummary {
websocket_events,
responses_api_overhead_ms,
responses_api_inference_time_ms,
responses_api_engine_iapi_ttft_ms,
responses_api_engine_service_ttft_ms,
responses_api_engine_iapi_tbt_ms,
responses_api_engine_service_tbt_ms,
}
}
}

View file

@ -1,6 +1,10 @@
use crate::TelemetryAuthMode;
use crate::metrics::names::API_CALL_COUNT_METRIC;
use crate::metrics::names::API_CALL_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_INFERENCE_TIME_DURATION_METRIC;
use crate::metrics::names::RESPONSES_API_OVERHEAD_DURATION_METRIC;
use crate::metrics::names::SSE_EVENT_COUNT_METRIC;
@ -48,6 +52,10 @@ const RESPONSES_WEBSOCKET_TIMING_KIND: &str = "responsesapi.websocket_timing";
const RESPONSES_WEBSOCKET_TIMING_METRICS_FIELD: &str = "timing_metrics";
const RESPONSES_API_OVERHEAD_FIELD: &str = "responses_duration_excl_engine_and_client_tool_time_ms";
const RESPONSES_API_INFERENCE_FIELD: &str = "engine_service_total_ms";
const RESPONSES_API_ENGINE_IAPI_TTFT_FIELD: &str = "engine_iapi_ttft_total_ms";
const RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD: &str = "engine_service_ttft_total_ms";
const RESPONSES_API_ENGINE_IAPI_TBT_FIELD: &str = "engine_iapi_tbt_across_engine_calls_ms";
const RESPONSES_API_ENGINE_SERVICE_TBT_FIELD: &str = "engine_service_tbt_across_engine_calls_ms";
impl OtelManager {
#[allow(clippy::too_many_arguments)]
@ -674,6 +682,42 @@ impl OtelManager {
if let Some(duration) = duration_from_ms_value(inference_value) {
self.record_duration(RESPONSES_API_INFERENCE_TIME_DURATION_METRIC, duration, &[]);
}
let engine_iapi_ttft_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TTFT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_iapi_ttft_value) {
self.record_duration(
RESPONSES_API_ENGINE_IAPI_TTFT_DURATION_METRIC,
duration,
&[],
);
}
let engine_service_ttft_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TTFT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_service_ttft_value) {
self.record_duration(
RESPONSES_API_ENGINE_SERVICE_TTFT_DURATION_METRIC,
duration,
&[],
);
}
let engine_iapi_tbt_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_IAPI_TBT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_iapi_tbt_value) {
self.record_duration(RESPONSES_API_ENGINE_IAPI_TBT_DURATION_METRIC, duration, &[]);
}
let engine_service_tbt_value =
timing_metrics.and_then(|value| value.get(RESPONSES_API_ENGINE_SERVICE_TBT_FIELD));
if let Some(duration) = duration_from_ms_value(engine_service_tbt_value) {
self.record_duration(
RESPONSES_API_ENGINE_SERVICE_TBT_DURATION_METRIC,
duration,
&[],
);
}
}
fn responses_type(event: &ResponseEvent) -> String {

View file

@ -67,7 +67,7 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<(
Option<std::result::Result<Message, tokio_tungstenite::tungstenite::Error>>,
codex_api::ApiError,
> = Ok(Some(Ok(Message::Text(
r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457}}"#
r#"{"type":"responsesapi.websocket_timing","timing_metrics":{"responses_duration_excl_engine_and_client_tool_time_ms":124,"engine_service_total_ms":457,"engine_iapi_ttft_total_ms":211,"engine_service_ttft_total_ms":233,"engine_iapi_tbt_across_engine_calls_ms":377,"engine_service_tbt_across_engine_calls_ms":399}}"#
.into(),
))));
manager.record_websocket_event(&ws_timing_response, Duration::from_millis(20));
@ -98,6 +98,10 @@ fn runtime_metrics_summary_collects_tool_api_and_streaming_metrics() -> Result<(
},
responses_api_overhead_ms: 124,
responses_api_inference_time_ms: 457,
responses_api_engine_iapi_ttft_ms: 211,
responses_api_engine_service_ttft_ms: 233,
responses_api_engine_iapi_tbt_ms: 377,
responses_api_engine_service_tbt_ms: 399,
};
assert_eq!(summary, expected);

View file

@ -105,6 +105,7 @@ use codex_core::skills::model::SkillMetadata;
#[cfg(target_os = "windows")]
use codex_core::windows_sandbox::WindowsSandboxLevelExt;
use codex_otel::OtelManager;
use codex_otel::RuntimeMetricsSummary;
use codex_protocol::ThreadId;
use codex_protocol::account::PlanType;
use codex_protocol::approvals::ElicitationRequestEvent;
@ -587,7 +588,8 @@ pub(crate) struct ChatWidget {
// This lets the separator show per-chunk work time (since the previous separator) rather than
// the total task-running time reported by the status indicator.
last_separator_elapsed_secs: Option<u64>,
// Runtime metrics accumulated across delta snapshots for the active turn.
turn_runtime_metrics: RuntimeMetricsSummary,
last_rendered_width: std::cell::Cell<Option<usize>>,
// Feedback sink for /feedback
feedback: codex_feedback::CodexFeedback,
@ -931,6 +933,32 @@ impl ChatWidget {
self.request_status_line_branch(cwd);
}
fn collect_runtime_metrics_delta(&mut self) {
if let Some(delta) = self.otel_manager.runtime_metrics_summary() {
self.apply_runtime_metrics_delta(delta);
}
}
fn apply_runtime_metrics_delta(&mut self, delta: RuntimeMetricsSummary) {
let should_log_timing = has_websocket_timing_metrics(delta);
self.turn_runtime_metrics.merge(delta);
if should_log_timing {
self.log_websocket_timing_totals(delta);
}
}
fn log_websocket_timing_totals(&mut self, delta: RuntimeMetricsSummary) {
if let Some(label) = history_cell::runtime_metrics_label(delta.responses_api_summary()) {
self.add_plain_history_lines(vec![
vec!["".dim(), format!("WebSocket timing: {label}").dark_gray()].into(),
]);
}
}
fn refresh_runtime_metrics(&mut self) {
self.collect_runtime_metrics_delta();
}
fn restore_retry_status_header_if_present(&mut self) {
if let Some(header) = self.retry_status_header.take() {
self.set_status_header(header);
@ -1218,6 +1246,7 @@ impl ChatWidget {
self.plan_item_active = false;
self.adaptive_chunking.reset();
self.plan_stream_controller = None;
self.turn_runtime_metrics = RuntimeMetricsSummary::default();
self.otel_manager.reset_runtime_metrics();
self.bottom_pane.clear_quit_shortcut_hint();
self.quit_shortcut_expires_at = None;
@ -1241,17 +1270,25 @@ impl ChatWidget {
}
self.flush_unified_exec_wait_streak();
if !from_replay {
let runtime_metrics = self.otel_manager.runtime_metrics_summary();
if runtime_metrics.is_some() {
let elapsed_seconds = self
.bottom_pane
.status_widget()
.map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds);
self.collect_runtime_metrics_delta();
let runtime_metrics =
(!self.turn_runtime_metrics.is_empty()).then_some(self.turn_runtime_metrics);
let show_work_separator = self.needs_final_message_separator && self.had_work_activity;
if show_work_separator || runtime_metrics.is_some() {
let elapsed_seconds = if show_work_separator {
self.bottom_pane
.status_widget()
.map(super::status_indicator_widget::StatusIndicatorWidget::elapsed_seconds)
.map(|current| self.worked_elapsed_from(current))
} else {
None
};
self.add_to_history(history_cell::FinalMessageSeparator::new(
elapsed_seconds,
runtime_metrics,
));
}
self.turn_runtime_metrics = RuntimeMetricsSummary::default();
self.needs_final_message_separator = false;
self.had_work_activity = false;
self.request_status_line_branch_refresh();
@ -2087,6 +2124,10 @@ impl ChatWidget {
}
self.app_event_tx.send(AppEvent::StopCommitAnimation);
}
if self.agent_turn_running {
self.refresh_runtime_metrics();
}
}
fn flush_interrupt_queue(&mut self) {
@ -2529,6 +2570,7 @@ impl ChatWidget {
plan_delta_buffer: String::new(),
plan_item_active: false,
last_separator_elapsed_secs: None,
turn_runtime_metrics: RuntimeMetricsSummary::default(),
last_rendered_width: std::cell::Cell::new(None),
feedback,
feedback_audience,
@ -2690,6 +2732,7 @@ impl ChatWidget {
needs_final_message_separator: false,
had_work_activity: false,
last_separator_elapsed_secs: None,
turn_runtime_metrics: RuntimeMetricsSummary::default(),
last_rendered_width: std::cell::Cell::new(None),
feedback,
feedback_audience,
@ -2840,6 +2883,7 @@ impl ChatWidget {
plan_delta_buffer: String::new(),
plan_item_active: false,
last_separator_elapsed_secs: None,
turn_runtime_metrics: RuntimeMetricsSummary::default(),
last_rendered_width: std::cell::Cell::new(None),
feedback,
feedback_audience,
@ -3859,6 +3903,10 @@ impl ChatWidget {
}
}
}
if !from_replay && self.agent_turn_running {
self.refresh_runtime_metrics();
}
}
fn on_entered_review_mode(&mut self, review: ReviewRequest, from_replay: bool) {
@ -6690,6 +6738,15 @@ impl ChatWidget {
}
}
fn has_websocket_timing_metrics(summary: RuntimeMetricsSummary) -> bool {
summary.responses_api_overhead_ms > 0
|| summary.responses_api_inference_time_ms > 0
|| summary.responses_api_engine_iapi_ttft_ms > 0
|| summary.responses_api_engine_service_ttft_ms > 0
|| summary.responses_api_engine_iapi_tbt_ms > 0
|| summary.responses_api_engine_service_tbt_ms > 0
}
impl Drop for ChatWidget {
fn drop(&mut self) {
self.stop_rate_limit_poller();

View file

@ -62,6 +62,7 @@ use codex_core::protocol::UndoStartedEvent;
use codex_core::protocol::ViewImageToolCallEvent;
use codex_core::protocol::WarningEvent;
use codex_otel::OtelManager;
use codex_otel::RuntimeMetricsSummary;
use codex_protocol::ThreadId;
use codex_protocol::account::PlanType;
use codex_protocol::config_types::CollaborationMode;
@ -955,6 +956,7 @@ async fn make_chatwidget_manual(
plan_delta_buffer: String::new(),
plan_item_active: false,
last_separator_elapsed_secs: None,
turn_runtime_metrics: RuntimeMetricsSummary::default(),
last_rendered_width: std::cell::Cell::new(None),
feedback: codex_feedback::CodexFeedback::new(),
feedback_audience: FeedbackAudience::External,
@ -5029,6 +5031,50 @@ async fn stream_recovery_restores_previous_status_header() {
assert!(chat.retry_status_header.is_none());
}
#[tokio::test]
async fn runtime_metrics_websocket_timing_logs_and_final_separator_sums_totals() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
chat.set_feature_enabled(Feature::RuntimeMetrics, true);
chat.on_task_started();
chat.apply_runtime_metrics_delta(RuntimeMetricsSummary {
responses_api_engine_iapi_ttft_ms: 120,
responses_api_engine_service_tbt_ms: 50,
..RuntimeMetricsSummary::default()
});
let first_log = drain_insert_history(&mut rx)
.iter()
.map(|lines| lines_to_single_string(lines))
.find(|line| line.contains("WebSocket timing:"))
.expect("expected websocket timing log");
assert!(first_log.contains("TTFT: 120ms (iapi)"));
assert!(first_log.contains("TBT: 50ms (service)"));
chat.apply_runtime_metrics_delta(RuntimeMetricsSummary {
responses_api_engine_iapi_ttft_ms: 80,
..RuntimeMetricsSummary::default()
});
let second_log = drain_insert_history(&mut rx)
.iter()
.map(|lines| lines_to_single_string(lines))
.find(|line| line.contains("WebSocket timing:"))
.expect("expected websocket timing log");
assert!(second_log.contains("TTFT: 80ms (iapi)"));
chat.on_task_complete(None, false);
let mut final_separator = None;
while let Ok(event) = rx.try_recv() {
if let AppEvent::InsertHistoryCell(cell) = event {
final_separator = Some(lines_to_single_string(&cell.display_lines(300)));
}
}
let final_separator = final_separator.expect("expected final separator with runtime metrics");
assert!(final_separator.contains("TTFT: 80ms (iapi)"));
assert!(final_separator.contains("TBT: 50ms (service)"));
}
#[tokio::test]
async fn multiple_agent_messages_in_single_turn_emit_multiple_headers() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;

View file

@ -2172,7 +2172,7 @@ impl HistoryCell for FinalMessageSeparator {
}
}
fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option<String> {
pub(crate) fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option<String> {
let mut parts = Vec::new();
if summary.tool_calls.count > 0 {
let duration = format_duration_ms(summary.tool_calls.duration_ms);
@ -2221,6 +2221,34 @@ fn runtime_metrics_label(summary: RuntimeMetricsSummary) -> Option<String> {
let duration = format_duration_ms(summary.responses_api_inference_time_ms);
parts.push(format!("Responses API inference: {duration}"));
}
if summary.responses_api_engine_iapi_ttft_ms > 0
|| summary.responses_api_engine_service_ttft_ms > 0
{
let mut ttft_parts = Vec::new();
if summary.responses_api_engine_iapi_ttft_ms > 0 {
let duration = format_duration_ms(summary.responses_api_engine_iapi_ttft_ms);
ttft_parts.push(format!("{duration} (iapi)"));
}
if summary.responses_api_engine_service_ttft_ms > 0 {
let duration = format_duration_ms(summary.responses_api_engine_service_ttft_ms);
ttft_parts.push(format!("{duration} (service)"));
}
parts.push(format!("TTFT: {}", ttft_parts.join(" ")));
}
if summary.responses_api_engine_iapi_tbt_ms > 0
|| summary.responses_api_engine_service_tbt_ms > 0
{
let mut tbt_parts = Vec::new();
if summary.responses_api_engine_iapi_tbt_ms > 0 {
let duration = format_duration_ms(summary.responses_api_engine_iapi_tbt_ms);
tbt_parts.push(format!("{duration} (iapi)"));
}
if summary.responses_api_engine_service_tbt_ms > 0 {
let duration = format_duration_ms(summary.responses_api_engine_service_tbt_ms);
tbt_parts.push(format!("{duration} (service)"));
}
parts.push(format!("TBT: {}", tbt_parts.join(" ")));
}
if parts.is_empty() {
None
} else {
@ -2391,9 +2419,13 @@ mod tests {
},
responses_api_overhead_ms: 650,
responses_api_inference_time_ms: 1_940,
responses_api_engine_iapi_ttft_ms: 410,
responses_api_engine_service_ttft_ms: 460,
responses_api_engine_iapi_tbt_ms: 1_180,
responses_api_engine_service_tbt_ms: 1_240,
};
let cell = FinalMessageSeparator::new(Some(12), Some(summary));
let rendered = render_lines(&cell.display_lines(300));
let rendered = render_lines(&cell.display_lines(600));
assert_eq!(rendered.len(), 1);
assert!(!rendered[0].contains("Worked for"));
@ -2404,6 +2436,8 @@ mod tests {
assert!(rendered[0].contains("4 events received (1.2s)"));
assert!(rendered[0].contains("Responses API overhead: 650ms"));
assert!(rendered[0].contains("Responses API inference: 1.9s"));
assert!(rendered[0].contains("TTFT: 410ms (iapi) 460ms (service)"));
assert!(rendered[0].contains("TBT: 1.2s (iapi) 1.2s (service)"));
}
#[test]