From 9450cd9ce510ee07fdb80a3bb4782cb303803f4b Mon Sep 17 00:00:00 2001 From: Charley Cunningham Date: Mon, 9 Feb 2026 12:42:20 -0800 Subject: [PATCH] core: add focused diagnostics for remote compaction overflows (#11133) ## Summary - add targeted remote-compaction failure diagnostics in compact_remote logging - log the specific values needed to explain overflow timing: - last_api_response_total_tokens - estimated_tokens_of_items_added_since_last_successful_api_response - estimated_bytes_of_items_added_since_last_successful_api_response - failing_compaction_request_body_bytes - simplify breakdown naming and remove last_api_response_total_bytes_estimate (it was an approximation and not useful for debugging) ## Why When compaction fails with context_length_exceeded, we need concrete, low-ambiguity numbers that map directly to: 1) what the API most recently reported, and 2) what local history added since then. This keeps the failure logs actionable without adding broad, noisy metrics. ## Testing - just fmt - cargo test -p codex-core --- codex-rs/core/src/codex.rs | 13 ++- codex-rs/core/src/compact_remote.rs | 58 +++++++++++++ codex-rs/core/src/context_manager/history.rs | 83 +++++++++++++------ .../core/src/context_manager/history_tests.rs | 15 +++- codex-rs/core/src/context_manager/mod.rs | 2 + codex-rs/core/src/truncate.rs | 8 ++ 6 files changed, 150 insertions(+), 29 deletions(-) diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index a6ce928b4..ceb8fcf94 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -114,6 +114,7 @@ use crate::config::resolve_web_search_mode_for_turn; use crate::config::types::McpServerConfig; use crate::config::types::ShellEnvironmentPolicy; use crate::context_manager::ContextManager; +use crate::context_manager::TotalTokenUsageBreakdown; use crate::environment_context::EnvironmentContext; use crate::error::CodexErr; use crate::error::Result as CodexResult; @@ -1231,12 +1232,20 @@ impl Session { format!("auto-compact-{id}") } - async fn get_total_token_usage(&self) -> i64 { + pub(crate) async fn get_total_token_usage(&self) -> i64 { let state = self.state.lock().await; state.get_total_token_usage(state.server_reasoning_included()) } - async fn get_estimated_token_count(&self, turn_context: &TurnContext) -> Option { + pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { + let state = self.state.lock().await; + state.history.get_total_token_usage_breakdown() + } + + pub(crate) async fn get_estimated_token_count( + &self, + turn_context: &TurnContext, + ) -> Option { let state = self.state.lock().await; state.history.estimate_token_count(turn_context) } diff --git a/codex-rs/core/src/compact_remote.rs b/codex-rs/core/src/compact_remote.rs index eac3188cd..8c7808e3f 100644 --- a/codex-rs/core/src/compact_remote.rs +++ b/codex-rs/core/src/compact_remote.rs @@ -4,7 +4,10 @@ use crate::Prompt; use crate::codex::Session; use crate::codex::TurnContext; use crate::context_manager::ContextManager; +use crate::context_manager::TotalTokenUsageBreakdown; +use crate::context_manager::estimate_response_item_model_visible_bytes; use crate::context_manager::is_codex_generated_item; +use crate::error::CodexErr; use crate::error::Result as CodexResult; use crate::protocol::CompactedItem; use crate::protocol::EventMsg; @@ -14,6 +17,8 @@ use codex_protocol::items::ContextCompactionItem; use codex_protocol::items::TurnItem; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ResponseItem; +use futures::TryFutureExt; +use tracing::error; use tracing::info; pub(crate) async fn run_inline_remote_auto_compact_task( @@ -98,6 +103,18 @@ async fn run_remote_compact_task_inner_impl( &turn_context.model_info, &turn_context.otel_manager, ) + .or_else(|err| async { + let total_usage_breakdown = sess.get_total_token_usage_breakdown().await; + let compact_request_log_data = + build_compact_request_log_data(&prompt.input, &prompt.base_instructions.text); + log_remote_compact_failure( + turn_context, + &compact_request_log_data, + total_usage_breakdown, + &err, + ); + Err(err) + }) .await?; new_history = sess .process_compacted_history(turn_context, new_history) @@ -121,6 +138,47 @@ async fn run_remote_compact_task_inner_impl( Ok(()) } +#[derive(Debug)] +struct CompactRequestLogData { + failing_compaction_request_model_visible_bytes: i64, +} + +fn build_compact_request_log_data( + input: &[ResponseItem], + instructions: &str, +) -> CompactRequestLogData { + let failing_compaction_request_model_visible_bytes = input + .iter() + .map(estimate_response_item_model_visible_bytes) + .fold( + i64::try_from(instructions.len()).unwrap_or(i64::MAX), + i64::saturating_add, + ); + + CompactRequestLogData { + failing_compaction_request_model_visible_bytes, + } +} + +fn log_remote_compact_failure( + turn_context: &TurnContext, + log_data: &CompactRequestLogData, + total_usage_breakdown: TotalTokenUsageBreakdown, + err: &CodexErr, +) { + error!( + turn_id = %turn_context.sub_id, + last_api_response_total_tokens = total_usage_breakdown.last_api_response_total_tokens, + all_history_items_model_visible_bytes = total_usage_breakdown.all_history_items_model_visible_bytes, + estimated_tokens_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_tokens_of_items_added_since_last_successful_api_response, + estimated_bytes_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_bytes_of_items_added_since_last_successful_api_response, + model_context_window_tokens = ?turn_context.model_context_window(), + failing_compaction_request_model_visible_bytes = log_data.failing_compaction_request_model_visible_bytes, + compact_error = %err, + "remote compaction failed" + ); +} + fn trim_function_call_history_to_fit_context_window( history: &mut ContextManager, turn_context: &TurnContext, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 4bf62d791..a5b223805 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -5,7 +5,7 @@ use crate::instructions::UserInstructions; use crate::session_prefix::is_session_prefix; use crate::truncate::TruncationPolicy; use crate::truncate::approx_token_count; -use crate::truncate::approx_tokens_from_byte_count; +use crate::truncate::approx_tokens_from_byte_count_i64; use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; use crate::user_shell_command::is_user_shell_command_text; @@ -27,6 +27,14 @@ pub(crate) struct ContextManager { token_info: Option, } +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct TotalTokenUsageBreakdown { + pub last_api_response_total_tokens: i64, + pub all_history_items_model_visible_bytes: i64, + pub estimated_tokens_of_items_added_since_last_successful_api_response: i64, + pub estimated_bytes_of_items_added_since_last_successful_api_response: i64, +} + impl ContextManager { pub(crate) fn new() -> Self { Self { @@ -102,9 +110,11 @@ impl ContextManager { let base_tokens = i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX); - let items_tokens = self.items.iter().fold(0i64, |acc, item| { - acc.saturating_add(estimate_item_token_count(item)) - }); + let items_tokens = self + .items + .iter() + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add); Some(base_tokens.saturating_add(items_tokens)) } @@ -231,9 +241,8 @@ impl ContextManager { } ) }) - .fold(0i64, |acc, item| { - acc.saturating_add(estimate_item_token_count(item)) - }) + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add) } // These are local items added after the most recent model-emitted item. @@ -247,14 +256,6 @@ impl ContextManager { &self.items[start..] } - fn get_items_after_last_model_generated_tokens(&self) -> i64 { - self.items_after_last_model_generated_item() - .iter() - .fold(0i64, |acc, item| { - acc.saturating_add(estimate_item_token_count(item)) - }) - } - /// When true, the server already accounted for past reasoning tokens and /// the client should not re-estimate them. pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 { @@ -263,8 +264,11 @@ impl ContextManager { .as_ref() .map(|info| info.last_token_usage.total_tokens) .unwrap_or(0); - let items_after_last_model_generated_tokens = - self.get_items_after_last_model_generated_tokens(); + let items_after_last_model_generated_tokens = self + .items_after_last_model_generated_item() + .iter() + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add); if server_reasoning_included { last_tokens.saturating_add(items_after_last_model_generated_tokens) } else { @@ -274,6 +278,34 @@ impl ContextManager { } } + pub(crate) fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown { + let last_usage = self + .token_info + .as_ref() + .map(|info| info.last_token_usage.clone()) + .unwrap_or_default(); + let items_after_last_model_generated = self.items_after_last_model_generated_item(); + + TotalTokenUsageBreakdown { + last_api_response_total_tokens: last_usage.total_tokens, + all_history_items_model_visible_bytes: self + .items + .iter() + .map(estimate_response_item_model_visible_bytes) + .fold(0i64, i64::saturating_add), + estimated_tokens_of_items_added_since_last_successful_api_response: + items_after_last_model_generated + .iter() + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add), + estimated_bytes_of_items_added_since_last_successful_api_response: + items_after_last_model_generated + .iter() + .map(estimate_response_item_model_visible_bytes) + .fold(0i64, i64::saturating_add), + } + } + /// This function enforces a couple of invariants on the in-memory history: /// 1. every call (function/custom) has a corresponding output entry /// 2. every output has a corresponding call entry @@ -357,6 +389,11 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize { } fn estimate_item_token_count(item: &ResponseItem) -> i64 { + let model_visible_bytes = estimate_response_item_model_visible_bytes(item); + approx_tokens_from_byte_count_i64(model_visible_bytes) +} + +pub(crate) fn estimate_response_item_model_visible_bytes(item: &ResponseItem) -> i64 { match item { ResponseItem::GhostSnapshot { .. } => 0, ResponseItem::Reasoning { @@ -365,14 +402,10 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 { } | ResponseItem::Compaction { encrypted_content: content, - } => { - let reasoning_bytes = estimate_reasoning_length(content.len()); - i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX) - } - item => { - let serialized = serde_json::to_string(item).unwrap_or_default(); - i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX) - } + } => i64::try_from(estimate_reasoning_length(content.len())).unwrap_or(i64::MAX), + item => serde_json::to_string(item) + .map(|serialized| i64::try_from(serialized.len()).unwrap_or(i64::MAX)) + .unwrap_or_default(), } } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 26bff9f4b..99670c05e 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -193,7 +193,11 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() { ); assert_eq!( - history.get_items_after_last_model_generated_tokens(), + history + .items_after_last_model_generated_item() + .iter() + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add), expected_tokens ); } @@ -202,7 +206,14 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() { fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() { let history = create_history_with_items(vec![user_msg("no model output yet")]); - assert_eq!(history.get_items_after_last_model_generated_tokens(), 0); + assert_eq!( + history + .items_after_last_model_generated_item() + .iter() + .map(estimate_item_token_count) + .fold(0i64, i64::saturating_add), + 0 + ); } #[test] diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index 22e9682fe..dcaf794c4 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -2,5 +2,7 @@ mod history; mod normalize; pub(crate) use history::ContextManager; +pub(crate) use history::TotalTokenUsageBreakdown; +pub(crate) use history::estimate_response_item_model_visible_bytes; pub(crate) use history::is_codex_generated_item; pub(crate) use history::is_user_turn_boundary; diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 441a15737..309ed91b6 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -304,6 +304,14 @@ pub(crate) fn approx_tokens_from_byte_count(bytes: usize) -> u64 { / (APPROX_BYTES_PER_TOKEN as u64) } +pub(crate) fn approx_tokens_from_byte_count_i64(bytes: i64) -> i64 { + if bytes <= 0 { + return 0; + } + let bytes = usize::try_from(bytes).unwrap_or(usize::MAX); + i64::try_from(approx_tokens_from_byte_count(bytes)).unwrap_or(i64::MAX) +} + #[cfg(test)] mod tests {