core: add focused diagnostics for remote compaction overflows (#11133)

## Summary
- add targeted remote-compaction failure diagnostics in compact_remote
logging
- log the specific values needed to explain overflow timing:
  - last_api_response_total_tokens
  - estimated_tokens_of_items_added_since_last_successful_api_response
  - estimated_bytes_of_items_added_since_last_successful_api_response
  - failing_compaction_request_body_bytes
- simplify breakdown naming and remove
last_api_response_total_bytes_estimate (it was an approximation and not
useful for debugging)

## Why
When compaction fails with context_length_exceeded, we need concrete,
low-ambiguity numbers that map directly to:
1) what the API most recently reported, and
2) what local history added since then.

This keeps the failure logs actionable without adding broad, noisy
metrics.

## Testing
- just fmt
- cargo test -p codex-core
This commit is contained in:
Charley Cunningham 2026-02-09 12:42:20 -08:00 committed by GitHub
parent f88667042e
commit 9450cd9ce5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 150 additions and 29 deletions

View file

@ -114,6 +114,7 @@ use crate::config::resolve_web_search_mode_for_turn;
use crate::config::types::McpServerConfig;
use crate::config::types::ShellEnvironmentPolicy;
use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::environment_context::EnvironmentContext;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
@ -1231,12 +1232,20 @@ impl Session {
format!("auto-compact-{id}")
}
async fn get_total_token_usage(&self) -> i64 {
pub(crate) async fn get_total_token_usage(&self) -> i64 {
let state = self.state.lock().await;
state.get_total_token_usage(state.server_reasoning_included())
}
async fn get_estimated_token_count(&self, turn_context: &TurnContext) -> Option<i64> {
pub(crate) async fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
let state = self.state.lock().await;
state.history.get_total_token_usage_breakdown()
}
pub(crate) async fn get_estimated_token_count(
&self,
turn_context: &TurnContext,
) -> Option<i64> {
let state = self.state.lock().await;
state.history.estimate_token_count(turn_context)
}

View file

@ -4,7 +4,10 @@ use crate::Prompt;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::context_manager::ContextManager;
use crate::context_manager::TotalTokenUsageBreakdown;
use crate::context_manager::estimate_response_item_model_visible_bytes;
use crate::context_manager::is_codex_generated_item;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::protocol::CompactedItem;
use crate::protocol::EventMsg;
@ -14,6 +17,8 @@ use codex_protocol::items::ContextCompactionItem;
use codex_protocol::items::TurnItem;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ResponseItem;
use futures::TryFutureExt;
use tracing::error;
use tracing::info;
pub(crate) async fn run_inline_remote_auto_compact_task(
@ -98,6 +103,18 @@ async fn run_remote_compact_task_inner_impl(
&turn_context.model_info,
&turn_context.otel_manager,
)
.or_else(|err| async {
let total_usage_breakdown = sess.get_total_token_usage_breakdown().await;
let compact_request_log_data =
build_compact_request_log_data(&prompt.input, &prompt.base_instructions.text);
log_remote_compact_failure(
turn_context,
&compact_request_log_data,
total_usage_breakdown,
&err,
);
Err(err)
})
.await?;
new_history = sess
.process_compacted_history(turn_context, new_history)
@ -121,6 +138,47 @@ async fn run_remote_compact_task_inner_impl(
Ok(())
}
#[derive(Debug)]
struct CompactRequestLogData {
failing_compaction_request_model_visible_bytes: i64,
}
fn build_compact_request_log_data(
input: &[ResponseItem],
instructions: &str,
) -> CompactRequestLogData {
let failing_compaction_request_model_visible_bytes = input
.iter()
.map(estimate_response_item_model_visible_bytes)
.fold(
i64::try_from(instructions.len()).unwrap_or(i64::MAX),
i64::saturating_add,
);
CompactRequestLogData {
failing_compaction_request_model_visible_bytes,
}
}
fn log_remote_compact_failure(
turn_context: &TurnContext,
log_data: &CompactRequestLogData,
total_usage_breakdown: TotalTokenUsageBreakdown,
err: &CodexErr,
) {
error!(
turn_id = %turn_context.sub_id,
last_api_response_total_tokens = total_usage_breakdown.last_api_response_total_tokens,
all_history_items_model_visible_bytes = total_usage_breakdown.all_history_items_model_visible_bytes,
estimated_tokens_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_tokens_of_items_added_since_last_successful_api_response,
estimated_bytes_of_items_added_since_last_successful_api_response = total_usage_breakdown.estimated_bytes_of_items_added_since_last_successful_api_response,
model_context_window_tokens = ?turn_context.model_context_window(),
failing_compaction_request_model_visible_bytes = log_data.failing_compaction_request_model_visible_bytes,
compact_error = %err,
"remote compaction failed"
);
}
fn trim_function_call_history_to_fit_context_window(
history: &mut ContextManager,
turn_context: &TurnContext,

View file

@ -5,7 +5,7 @@ use crate::instructions::UserInstructions;
use crate::session_prefix::is_session_prefix;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::approx_tokens_from_byte_count;
use crate::truncate::approx_tokens_from_byte_count_i64;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::truncate::truncate_text;
use crate::user_shell_command::is_user_shell_command_text;
@ -27,6 +27,14 @@ pub(crate) struct ContextManager {
token_info: Option<TokenUsageInfo>,
}
#[derive(Debug, Clone, Copy, Default)]
pub(crate) struct TotalTokenUsageBreakdown {
pub last_api_response_total_tokens: i64,
pub all_history_items_model_visible_bytes: i64,
pub estimated_tokens_of_items_added_since_last_successful_api_response: i64,
pub estimated_bytes_of_items_added_since_last_successful_api_response: i64,
}
impl ContextManager {
pub(crate) fn new() -> Self {
Self {
@ -102,9 +110,11 @@ impl ContextManager {
let base_tokens =
i64::try_from(approx_token_count(&base_instructions.text)).unwrap_or(i64::MAX);
let items_tokens = self.items.iter().fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
});
let items_tokens = self
.items
.iter()
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add);
Some(base_tokens.saturating_add(items_tokens))
}
@ -231,9 +241,8 @@ impl ContextManager {
}
)
})
.fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
})
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add)
}
// These are local items added after the most recent model-emitted item.
@ -247,14 +256,6 @@ impl ContextManager {
&self.items[start..]
}
fn get_items_after_last_model_generated_tokens(&self) -> i64 {
self.items_after_last_model_generated_item()
.iter()
.fold(0i64, |acc, item| {
acc.saturating_add(estimate_item_token_count(item))
})
}
/// When true, the server already accounted for past reasoning tokens and
/// the client should not re-estimate them.
pub(crate) fn get_total_token_usage(&self, server_reasoning_included: bool) -> i64 {
@ -263,8 +264,11 @@ impl ContextManager {
.as_ref()
.map(|info| info.last_token_usage.total_tokens)
.unwrap_or(0);
let items_after_last_model_generated_tokens =
self.get_items_after_last_model_generated_tokens();
let items_after_last_model_generated_tokens = self
.items_after_last_model_generated_item()
.iter()
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add);
if server_reasoning_included {
last_tokens.saturating_add(items_after_last_model_generated_tokens)
} else {
@ -274,6 +278,34 @@ impl ContextManager {
}
}
pub(crate) fn get_total_token_usage_breakdown(&self) -> TotalTokenUsageBreakdown {
let last_usage = self
.token_info
.as_ref()
.map(|info| info.last_token_usage.clone())
.unwrap_or_default();
let items_after_last_model_generated = self.items_after_last_model_generated_item();
TotalTokenUsageBreakdown {
last_api_response_total_tokens: last_usage.total_tokens,
all_history_items_model_visible_bytes: self
.items
.iter()
.map(estimate_response_item_model_visible_bytes)
.fold(0i64, i64::saturating_add),
estimated_tokens_of_items_added_since_last_successful_api_response:
items_after_last_model_generated
.iter()
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add),
estimated_bytes_of_items_added_since_last_successful_api_response:
items_after_last_model_generated
.iter()
.map(estimate_response_item_model_visible_bytes)
.fold(0i64, i64::saturating_add),
}
}
/// This function enforces a couple of invariants on the in-memory history:
/// 1. every call (function/custom) has a corresponding output entry
/// 2. every output has a corresponding call entry
@ -357,6 +389,11 @@ fn estimate_reasoning_length(encoded_len: usize) -> usize {
}
fn estimate_item_token_count(item: &ResponseItem) -> i64 {
let model_visible_bytes = estimate_response_item_model_visible_bytes(item);
approx_tokens_from_byte_count_i64(model_visible_bytes)
}
pub(crate) fn estimate_response_item_model_visible_bytes(item: &ResponseItem) -> i64 {
match item {
ResponseItem::GhostSnapshot { .. } => 0,
ResponseItem::Reasoning {
@ -365,14 +402,10 @@ fn estimate_item_token_count(item: &ResponseItem) -> i64 {
}
| ResponseItem::Compaction {
encrypted_content: content,
} => {
let reasoning_bytes = estimate_reasoning_length(content.len());
i64::try_from(approx_tokens_from_byte_count(reasoning_bytes)).unwrap_or(i64::MAX)
}
item => {
let serialized = serde_json::to_string(item).unwrap_or_default();
i64::try_from(approx_token_count(&serialized)).unwrap_or(i64::MAX)
}
} => i64::try_from(estimate_reasoning_length(content.len())).unwrap_or(i64::MAX),
item => serde_json::to_string(item)
.map(|serialized| i64::try_from(serialized.len()).unwrap_or(i64::MAX))
.unwrap_or_default(),
}
}

View file

@ -193,7 +193,11 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
);
assert_eq!(
history.get_items_after_last_model_generated_tokens(),
history
.items_after_last_model_generated_item()
.iter()
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add),
expected_tokens
);
}
@ -202,7 +206,14 @@ fn items_after_last_model_generated_tokens_include_user_and_tool_output() {
fn items_after_last_model_generated_tokens_are_zero_without_model_generated_items() {
let history = create_history_with_items(vec![user_msg("no model output yet")]);
assert_eq!(history.get_items_after_last_model_generated_tokens(), 0);
assert_eq!(
history
.items_after_last_model_generated_item()
.iter()
.map(estimate_item_token_count)
.fold(0i64, i64::saturating_add),
0
);
}
#[test]

View file

@ -2,5 +2,7 @@ mod history;
mod normalize;
pub(crate) use history::ContextManager;
pub(crate) use history::TotalTokenUsageBreakdown;
pub(crate) use history::estimate_response_item_model_visible_bytes;
pub(crate) use history::is_codex_generated_item;
pub(crate) use history::is_user_turn_boundary;

View file

@ -304,6 +304,14 @@ pub(crate) fn approx_tokens_from_byte_count(bytes: usize) -> u64 {
/ (APPROX_BYTES_PER_TOKEN as u64)
}
pub(crate) fn approx_tokens_from_byte_count_i64(bytes: i64) -> i64 {
if bytes <= 0 {
return 0;
}
let bytes = usize::try_from(bytes).unwrap_or(usize::MAX);
i64::try_from(approx_tokens_from_byte_count(bytes)).unwrap_or(i64::MAX)
}
#[cfg(test)]
mod tests {