From efebc62fb7e9a3cf4a1713a274545f0be5f51639 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Wed, 19 Nov 2025 01:56:08 -0800 Subject: [PATCH] Move shell to use `truncate_text` (#6842) Move shell to use the configurable `truncate_text` --------- Co-authored-by: pakrym-oai --- codex-rs/core/src/client_common.rs | 21 +- codex-rs/core/src/codex.rs | 3 +- codex-rs/core/src/context_manager/history.rs | 16 +- .../core/src/context_manager/history_tests.rs | 92 ++-- codex-rs/core/src/context_manager/mod.rs | 1 - codex-rs/core/src/tasks/user_shell.rs | 28 +- codex-rs/core/src/tools/events.rs | 16 +- codex-rs/core/src/tools/mod.rs | 34 +- codex-rs/core/src/truncate.rs | 475 ++++++------------ codex-rs/core/src/unified_exec/session.rs | 4 +- .../core/src/unified_exec/session_manager.rs | 6 +- codex-rs/core/src/user_shell_command.rs | 30 +- .../core/tests/suite/shell_serialization.rs | 15 +- codex-rs/core/tests/suite/truncation.rs | 319 ++++++++++-- codex-rs/core/tests/suite/unified_exec.rs | 3 +- codex-rs/core/tests/suite/user_shell_cmd.rs | 18 +- 16 files changed, 583 insertions(+), 498 deletions(-) diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 7e3eb1ac9..3842ea7e9 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -165,11 +165,9 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String { )); let mut output = parsed.output.clone(); - if let Some(total_lines) = extract_total_output_lines(&parsed.output) { + if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) { sections.push(format!("Total output lines: {total_lines}")); - if let Some(stripped) = strip_total_output_header(&output) { - output = stripped.to_string(); - } + output = stripped.to_string(); } sections.push("Output:".to_string()); @@ -178,19 +176,12 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String { sections.join("\n") } -fn extract_total_output_lines(output: &str) -> Option { - let marker_start = output.find("[... omitted ")?; - let marker = &output[marker_start..]; - let (_, after_of) = marker.split_once(" of ")?; - let (total_segment, _) = after_of.split_once(' ')?; - total_segment.parse::().ok() -} - -fn strip_total_output_header(output: &str) -> Option<&str> { +fn strip_total_output_header(output: &str) -> Option<(&str, u32)> { let after_prefix = output.strip_prefix("Total output lines: ")?; - let (_, remainder) = after_prefix.split_once('\n')?; + let (total_segment, remainder) = after_prefix.split_once('\n')?; + let total_lines = total_segment.parse::().ok()?; let remainder = remainder.strip_prefix('\n').unwrap_or(remainder); - Some(remainder) + Some((remainder, total_lines)) } #[derive(Debug)] diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 5156ed645..c2f2fbf5d 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -2479,8 +2479,9 @@ mod tests { duration: StdDuration::from_secs(1), timed_out: true, }; + let (_, turn_context) = make_session_and_context(); - let out = format_exec_output_str(&exec); + let out = format_exec_output_str(&exec, turn_context.truncation_policy); assert_eq!( out, diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index af60ada9f..8575fe9f6 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -145,13 +145,17 @@ impl ContextManager { } fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem { + let policy_with_serialization_budget = policy.mul(1.2); match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = truncate_text(output.content.as_str(), policy); - let truncated_items = output - .content_items - .as_ref() - .map(|items| truncate_function_output_items_with_policy(items, policy)); + let truncated = + truncate_text(output.content.as_str(), policy_with_serialization_budget); + let truncated_items = output.content_items.as_ref().map(|items| { + truncate_function_output_items_with_policy( + items, + policy_with_serialization_budget, + ) + }); ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { @@ -162,7 +166,7 @@ impl ContextManager { } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let truncated = truncate_text(output, policy); + let truncated = truncate_text(output, policy_with_serialization_budget); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), output: truncated, diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index fecd0a727..1a01604a7 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -12,8 +12,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary; use pretty_assertions::assert_eq; use regex_lite::Regex; -const EXEC_FORMAT_MAX_LINES: usize = 256; const EXEC_FORMAT_MAX_BYTES: usize = 10_000; +const EXEC_FORMAT_MAX_TOKENS: usize = 2_500; fn assistant_msg(text: &str) -> ResponseItem { ResponseItem::Message { @@ -56,6 +56,10 @@ fn reasoning_msg(text: &str) -> ResponseItem { } } +fn truncate_exec_output(content: &str) -> String { + truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS)) +} + #[test] fn filters_non_api_messages() { let mut h = ContextManager::default(); @@ -228,7 +232,7 @@ fn normalization_retains_local_shell_outputs() { ResponseItem::FunctionCallOutput { call_id: "shell-1".to_string(), output: FunctionCallOutputPayload { - content: "ok".to_string(), + content: "Total output lines: 1\n\nok".to_string(), ..Default::default() }, }, @@ -330,8 +334,8 @@ fn record_items_respects_custom_token_limit() { assert!(stored.content.contains("tokens truncated")); } -fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) { - let pattern = truncated_message_pattern(line, total_lines); +fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) { + let pattern = truncated_message_pattern(line); let regex = Regex::new(&pattern).unwrap_or_else(|err| { panic!("failed to compile regex {pattern}: {err}"); }); @@ -347,23 +351,18 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz "body exceeds byte limit: {} bytes", body.len() ); + let removed: usize = captures + .name("removed") + .expect("missing removed capture") + .as_str() + .parse() + .unwrap_or_else(|err| panic!("invalid removed tokens: {err}")); + assert_eq!(removed, expected_removed, "mismatched removed token count"); } -fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_lines = EXEC_FORMAT_MAX_LINES / 2; - let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines; - let head_take = head_lines.min(total_lines); - let tail_take = tail_lines.min(total_lines.saturating_sub(head_take)); - let omitted = total_lines.saturating_sub(head_take + tail_take); +fn truncated_message_pattern(line: &str) -> String { let escaped_line = regex_lite::escape(line); - if omitted == 0 { - return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$", - ); - } - format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$", - ) + format!(r"(?s)^(?P{escaped_line}.*?)(?:\r?)?…(?P\d+) tokens truncated…(?:.*)?$") } #[test] @@ -371,27 +370,18 @@ fn format_exec_output_truncates_large_error() { let line = "very long execution error line that should trigger truncation\n"; let large_error = line.repeat(2_500); // way beyond both byte and line limits - let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES); + let truncated = truncate_exec_output(&large_error); - let total_lines = large_error.lines().count(); - assert_truncated_message_matches(&truncated, line, total_lines); + assert_truncated_message_matches(&truncated, line, 36250); assert_ne!(truncated, large_error); } #[test] fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50); - let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES); - + let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 10000); + let truncated = truncate_exec_output(&long_line); assert_ne!(truncated, long_line); - let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES); - let marker_line = format!( - "[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]" - ); - assert!( - truncated.contains(&marker_line), - "missing byte truncation marker: {truncated}" - ); + assert_truncated_message_matches(&truncated, "a", 2500); assert!( !truncated.contains("omitted"), "line omission marker should not appear when no lines were dropped: {truncated}" @@ -401,34 +391,25 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() { #[test] fn format_exec_output_returns_original_when_within_limits() { let content = "example output\n".repeat(10); - - assert_eq!( - truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES), - content - ); + assert_eq!(truncate_exec_output(&content), content); } #[test] fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { - let total_lines = EXEC_FORMAT_MAX_LINES + 100; + let total_lines = 2_000; + let filler = "x".repeat(64); let content: String = (0..total_lines) - .map(|idx| format!("line-{idx}\n")) + .map(|idx| format!("line-{idx}-{filler}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); - let omitted = total_lines - EXEC_FORMAT_MAX_LINES; - let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); - + let truncated = truncate_exec_output(&content); + assert_truncated_message_matches(&truncated, "line-0-", 34_723); assert!( - truncated.contains(&expected_marker), - "missing omitted marker: {truncated}" - ); - assert!( - truncated.contains("line-0\n"), + truncated.contains("line-0-"), "expected head line to remain: {truncated}" ); - let last_line = format!("line-{}\n", total_lines - 1); + let last_line = format!("line-{}-", total_lines - 1); assert!( truncated.contains(&last_line), "expected tail line to remain: {truncated}" @@ -437,22 +418,15 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { #[test] fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { - let total_lines = EXEC_FORMAT_MAX_LINES + 42; + let total_lines = 300; let long_line = "x".repeat(256); let content: String = (0..total_lines) .map(|idx| format!("line-{idx}-{long_line}\n")) .collect(); - let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES); + let truncated = truncate_exec_output(&content); - assert!( - truncated.contains("[... omitted 42 of 298 lines ...]"), - "expected omitted marker when line count exceeds limit: {truncated}" - ); - assert!( - !truncated.contains("byte limit"), - "line omission marker should take precedence over byte marker: {truncated}" - ); + assert_truncated_message_matches(&truncated, "line-0-", 17_423); } //TODO(aibrahim): run CI in release mode. diff --git a/codex-rs/core/src/context_manager/mod.rs b/codex-rs/core/src/context_manager/mod.rs index d347a7714..5089b5e8b 100644 --- a/codex-rs/core/src/context_manager/mod.rs +++ b/codex-rs/core/src/context_manager/mod.rs @@ -1,5 +1,4 @@ mod history; mod normalize; -pub(crate) use crate::truncate::truncate_with_line_bytes_budget; pub(crate) use history::ContextManager; diff --git a/codex-rs/core/src/tasks/user_shell.rs b/codex-rs/core/src/tasks/user_shell.rs index 465517adb..9644d3678 100644 --- a/codex-rs/core/src/tasks/user_shell.rs +++ b/codex-rs/core/src/tasks/user_shell.rs @@ -122,7 +122,11 @@ impl SessionTask for UserShellCommandTask { duration: Duration::ZERO, timed_out: false, }; - let output_items = [user_shell_command_record_item(&raw_command, &exec_output)]; + let output_items = [user_shell_command_record_item( + &raw_command, + &exec_output, + &turn_context, + )]; session .record_conversation_items(turn_context.as_ref(), &output_items) .await; @@ -164,12 +168,19 @@ impl SessionTask for UserShellCommandTask { aggregated_output: output.aggregated_output.text.clone(), exit_code: output.exit_code, duration: output.duration, - formatted_output: format_exec_output_str(&output), + formatted_output: format_exec_output_str( + &output, + turn_context.truncation_policy, + ), }), ) .await; - let output_items = [user_shell_command_record_item(&raw_command, &output)]; + let output_items = [user_shell_command_record_item( + &raw_command, + &output, + &turn_context, + )]; session .record_conversation_items(turn_context.as_ref(), &output_items) .await; @@ -201,11 +212,18 @@ impl SessionTask for UserShellCommandTask { aggregated_output: exec_output.aggregated_output.text.clone(), exit_code: exec_output.exit_code, duration: exec_output.duration, - formatted_output: format_exec_output_str(&exec_output), + formatted_output: format_exec_output_str( + &exec_output, + turn_context.truncation_policy, + ), }), ) .await; - let output_items = [user_shell_command_record_item(&raw_command, &exec_output)]; + let output_items = [user_shell_command_record_item( + &raw_command, + &exec_output, + &turn_context, + )]; session .record_conversation_items(turn_context.as_ref(), &output_items) .await; diff --git a/codex-rs/core/src/tools/events.rs b/codex-rs/core/src/tools/events.rs index a23148c02..131b8f698 100644 --- a/codex-rs/core/src/tools/events.rs +++ b/codex-rs/core/src/tools/events.rs @@ -242,12 +242,16 @@ impl ToolEmitter { self.emit(ctx, ToolEventStage::Begin).await; } - fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String { + fn format_exec_output_for_model( + &self, + output: &ExecToolCallOutput, + ctx: ToolEventCtx<'_>, + ) -> String { match self { Self::Shell { freeform: true, .. } => { - super::format_exec_output_for_model_freeform(output) + super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy) } - _ => super::format_exec_output_for_model_structured(output), + _ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy), } } @@ -258,7 +262,7 @@ impl ToolEmitter { ) -> Result { let (event, result) = match out { Ok(output) => { - let content = self.format_exec_output_for_model(&output); + let content = self.format_exec_output_for_model(&output, ctx); let exit_code = output.exit_code; let event = ToolEventStage::Success(output); let result = if exit_code == 0 { @@ -270,7 +274,7 @@ impl ToolEmitter { } Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output }))) | Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => { - let response = self.format_exec_output_for_model(&output); + let response = self.format_exec_output_for_model(&output, ctx); let event = ToolEventStage::Failure(ToolEventFailure::Output(*output)); let result = Err(FunctionCallError::RespondToModel(response)); (event, result) @@ -359,7 +363,7 @@ async fn emit_exec_stage( aggregated_output: output.aggregated_output.text.clone(), exit_code: output.exit_code, duration: output.duration, - formatted_output: format_exec_output_str(&output), + formatted_output: format_exec_output_str(&output, ctx.turn.truncation_policy), }; emit_exec_end(ctx, exec_input, exec_result).await; } diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 9214a4854..c1ef916d7 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -9,9 +9,10 @@ pub mod runtimes; pub mod sandboxing; pub mod spec; -use crate::context_manager::truncate_with_line_bytes_budget; use crate::exec::ExecToolCallOutput; -use crate::truncate::truncate_formatted_exec_output; +use crate::truncate::TruncationPolicy; +use crate::truncate::formatted_truncate_text; +use crate::truncate::truncate_text; pub use router::ToolRouter; use serde::Serialize; @@ -21,12 +22,12 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str = "[... telemetry preview truncated ...]"; -// TODO(aibrahim): migrate shell tool to use truncate text and respect config value -const SHELL_OUTPUT_MAX_BYTES: usize = 10_000; - /// Format the combined exec output for sending back to the model. /// Includes exit code and duration metadata; truncates large bodies safely. -pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String { +pub fn format_exec_output_for_model_structured( + exec_output: &ExecToolCallOutput, + truncation_policy: TruncationPolicy, +) -> String { let ExecToolCallOutput { exit_code, duration, @@ -48,7 +49,7 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) // round to 1 decimal place let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0; - let formatted_output = format_exec_output_str(exec_output); + let formatted_output = format_exec_output_str(exec_output, truncation_policy); let payload = ExecOutput { output: &formatted_output, @@ -62,18 +63,16 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) serde_json::to_string(&payload).expect("serialize ExecOutput") } -pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String { +pub fn format_exec_output_for_model_freeform( + exec_output: &ExecToolCallOutput, + truncation_policy: TruncationPolicy, +) -> String { // round to 1 decimal place let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0; let total_lines = exec_output.aggregated_output.text.lines().count(); - let formatted_output = truncate_formatted_exec_output( - &exec_output.aggregated_output.text, - total_lines, - SHELL_OUTPUT_MAX_BYTES, - 256, // TODO: to be removed - ); + let formatted_output = truncate_text(&exec_output.aggregated_output.text, truncation_policy); let mut sections = Vec::new(); @@ -89,7 +88,10 @@ pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) - sections.join("\n") } -pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { +pub fn format_exec_output_str( + exec_output: &ExecToolCallOutput, + truncation_policy: TruncationPolicy, +) -> String { let ExecToolCallOutput { aggregated_output, .. } = exec_output; @@ -106,5 +108,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String { }; // Truncate for model consumption before serialization. - truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES) + formatted_truncate_text(&body, truncation_policy) } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index a6b0fa516..6b997ac09 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,11 +2,8 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. -use codex_protocol::models::FunctionCallOutputContentItem; -use codex_utils_string::take_bytes_at_char_boundary; -use codex_utils_string::take_last_bytes_at_char_boundary; - use crate::config::Config; +use codex_protocol::models::FunctionCallOutputContentItem; const APPROX_BYTES_PER_TOKEN: usize = 4; @@ -17,6 +14,18 @@ pub enum TruncationPolicy { } impl TruncationPolicy { + /// Scale the underlying budget by `multiplier`, rounding up to avoid under-budgeting. + pub fn mul(self, multiplier: f64) -> Self { + match self { + TruncationPolicy::Bytes(bytes) => { + TruncationPolicy::Bytes((bytes as f64 * multiplier).ceil() as usize) + } + TruncationPolicy::Tokens(tokens) => { + TruncationPolicy::Tokens((tokens as f64 * multiplier).ceil() as usize) + } + } + } + pub fn new(config: &Config) -> Self { let config_token_limit = config.tool_output_token_limit; @@ -65,34 +74,20 @@ impl TruncationPolicy { } } -/// Format a block of exec/tool output for model consumption, truncating by -/// lines and bytes while preserving head and tail segments. -pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String { - // TODO(aibrahim): to be removed - let lines_budget = 256; - // Head+tail truncation for the model: show the beginning and end with an elision. - // Clients still receive full streams; only this formatted summary is capped. - let total_lines = content.lines().count(); - if content.len() <= bytes_budget && total_lines <= lines_budget { +pub(crate) fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String { + if content.len() <= policy.byte_budget() { return content.to_string(); } - let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget); - format!("Total output lines: {total_lines}\n\n{output}") + let total_lines = content.lines().count(); + let result = truncate_text(content, policy); + format!("Total output lines: {total_lines}\n\n{result}") } pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String { match policy { - TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate( - content, - bytes, - TruncationSource::Policy(TruncationPolicy::Bytes(bytes)), - ), - TruncationPolicy::Tokens(tokens) => { - let (truncated, _) = truncate_with_token_budget( - content, - tokens, - TruncationSource::Policy(TruncationPolicy::Tokens(tokens)), - ); + TruncationPolicy::Bytes(_) => truncate_with_byte_estimate(content, policy), + TruncationPolicy::Tokens(_) => { + let (truncated, _) = truncate_with_token_budget(content, policy); truncated } } @@ -162,24 +157,18 @@ pub(crate) fn truncate_function_output_items_with_policy( /// preserving the beginning and the end. Returns the possibly truncated string /// and `Some(original_token_count)` if truncation occurred; otherwise returns /// the original string and `None`. -fn truncate_with_token_budget( - s: &str, - max_tokens: usize, - source: TruncationSource, -) -> (String, Option) { +fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Option) { if s.is_empty() { return (String::new(), None); } + let max_tokens = policy.token_budget(); let byte_len = s.len(); - if max_tokens > 0 { - let small_threshold = approx_bytes_for_tokens(max_tokens / 4); - if small_threshold > 0 && byte_len <= small_threshold { - return (s.to_string(), None); - } + if max_tokens > 0 && byte_len <= approx_bytes_for_tokens(max_tokens) { + return (s.to_string(), None); } - let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source); + let truncated = truncate_with_byte_estimate(s, policy); let approx_total_usize = approx_token_count(s); let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX); if truncated == s { @@ -192,14 +181,19 @@ fn truncate_with_token_budget( /// Truncate a string using a byte budget derived from the token budget, without /// performing any real tokenization. This keeps the logic purely byte-based and /// uses a bytes placeholder in the truncated output. -fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String { +fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String { if s.is_empty() { return String::new(); } + let total_chars = s.chars().count(); + let max_bytes = policy.byte_budget(); if max_bytes == 0 { // No budget to show content; just report that everything was truncated. - let marker = format_truncation_marker(source, removed_units_for_source(source, s.len())); + let marker = format_truncation_marker( + policy, + removed_units_for_source(policy, s.len(), total_chars), + ); return marker; } @@ -208,127 +202,32 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour } let total_bytes = s.len(); - let removed_bytes = total_bytes.saturating_sub(max_bytes); - let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes)); - let marker_len = marker.len(); - if marker_len >= max_bytes { - let truncated_marker = truncate_on_boundary(&marker, max_bytes); - return truncated_marker.to_string(); - } - - let keep_budget = max_bytes - marker_len; - let (left_budget, right_budget) = split_budget(keep_budget); + let (left_budget, right_budget) = split_budget(max_bytes); let prefix_end = pick_prefix_end(s, left_budget); let mut suffix_start = pick_suffix_start(s, right_budget); if suffix_start < prefix_end { suffix_start = prefix_end; } - let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker); + let left_chars = s[..prefix_end].chars().count(); + let right_chars = s[suffix_start..].chars().count(); + let removed_chars = total_chars + .saturating_sub(left_chars) + .saturating_sub(right_chars); - if out.len() > max_bytes { - let boundary = truncate_on_boundary(&out, max_bytes); - out.truncate(boundary.len()); - } + let marker = format_truncation_marker( + policy, + removed_units_for_source(policy, total_bytes.saturating_sub(max_bytes), removed_chars), + ); - out + assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker) } -pub(crate) fn truncate_formatted_exec_output( - content: &str, - total_lines: usize, - limit_bytes: usize, - limit_lines: usize, -) -> String { - error_on_double_truncation(content); - let head_lines: usize = limit_lines / 2; - let tail_lines: usize = limit_lines - head_lines; // 128 - let head_bytes: usize = limit_bytes / 2; - let segments: Vec<&str> = content.split_inclusive('\n').collect(); - let head_take = head_lines.min(segments.len()); - let tail_take = tail_lines.min(segments.len().saturating_sub(head_take)); - let omitted = segments.len().saturating_sub(head_take + tail_take); - - let head_slice_end: usize = segments - .iter() - .take(head_take) - .map(|segment| segment.len()) - .sum(); - let tail_slice_start: usize = if tail_take == 0 { - content.len() - } else { - content.len() - - segments - .iter() - .rev() - .take(tail_take) - .map(|segment| segment.len()) - .sum::() - }; - let head_slice = &content[..head_slice_end]; - let tail_slice = &content[tail_slice_start..]; - let truncated_by_bytes = content.len() > limit_bytes; - // this is a bit wrong. We are counting metadata lines and not just shell output lines. - let marker = if omitted > 0 { - let marker_text = format_truncation_marker( - TruncationSource::LineOmission { total_lines }, - u64::try_from(omitted).unwrap_or(u64::MAX), - ); - Some(format!("\n{marker_text}\n\n")) - } else if truncated_by_bytes { - let removed_bytes = - u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX); - let marker_text = - format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes); - Some(format!("\n{marker_text}\n\n")) - } else { - None - }; - - let marker_len = marker.as_ref().map_or(0, String::len); - let base_head_budget = head_bytes.min(limit_bytes); - let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len)); - let head_part = take_bytes_at_char_boundary(head_slice, head_budget); - let mut result = String::with_capacity(limit_bytes.min(content.len())); - - result.push_str(head_part); - if let Some(marker_text) = marker.as_ref() { - result.push_str(marker_text); - } - - let remaining = limit_bytes.saturating_sub(result.len()); - if remaining == 0 { - return result; - } - - let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining); - result.push_str(tail_part); - - result -} - -#[derive(Clone, Copy)] -pub enum TruncationSource { - Policy(TruncationPolicy), - LineOmission { total_lines: usize }, - ByteLimit { limit_bytes: usize }, -} - -fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String { - match source { - TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { - format!("[…{removed_count} tokens truncated…]") - } - TruncationSource::Policy(TruncationPolicy::Bytes(_)) => { - format!("[…{removed_count} bytes truncated…]") - } - TruncationSource::LineOmission { total_lines } => { - format!("[... omitted {removed_count} of {total_lines} lines ...]") - } - TruncationSource::ByteLimit { limit_bytes } => { - format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]") - } +fn format_truncation_marker(policy: TruncationPolicy, removed_count: u64) -> String { + match policy { + TruncationPolicy::Tokens(_) => format!("…{removed_count} tokens truncated…"), + TruncationPolicy::Bytes(_) => format!("…{removed_count} chars truncated…"), } } @@ -337,12 +236,14 @@ fn split_budget(budget: usize) -> (usize, usize) { (left, budget - left) } -fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 { - match source { - TruncationSource::Policy(TruncationPolicy::Tokens(_)) => { - approx_tokens_from_byte_count(removed_bytes) - } - _ => u64::try_from(removed_bytes).unwrap_or(u64::MAX), +fn removed_units_for_source( + policy: TruncationPolicy, + removed_bytes: usize, + removed_chars: usize, +) -> u64 { + match policy { + TruncationPolicy::Tokens(_) => approx_tokens_from_byte_count(removed_bytes), + TruncationPolicy::Bytes(_) => u64::try_from(removed_chars).unwrap_or(u64::MAX), } } @@ -350,7 +251,6 @@ fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1); out.push_str(prefix); out.push_str(marker); - out.push('\n'); out.push_str(suffix); out } @@ -405,203 +305,130 @@ fn pick_suffix_start(s: &str, right_budget: usize) -> usize { idx } -fn error_on_double_truncation(content: &str) { - if content.contains("Total output lines:") && content.contains("omitted") { - tracing::error!( - "FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}" - ); - } -} - #[cfg(test)] mod tests { - use crate::config::OPENAI_DEFAULT_MODEL; - use crate::model_family::derive_default_model_family; - use crate::model_family::find_family_for_model; use super::TruncationPolicy; - use super::TruncationSource; use super::approx_token_count; + use super::formatted_truncate_text; use super::truncate_function_output_items_with_policy; - use super::truncate_with_line_bytes_budget; + use super::truncate_text; use super::truncate_with_token_budget; use codex_protocol::models::FunctionCallOutputContentItem; use pretty_assertions::assert_eq; - use regex_lite::Regex; - const MODEL_FORMAT_MAX_LINES: usize = 256; + #[test] + fn truncate_bytes_less_than_placeholder_returns_placeholder() { + let content = "example output"; - fn model_format_max_bytes() -> usize { - find_family_for_model(OPENAI_DEFAULT_MODEL) - .unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL)) - .truncation_policy - .byte_budget() - } - - fn truncated_message_pattern(line: &str, total_lines: usize) -> String { - let head_lines = MODEL_FORMAT_MAX_LINES / 2; - let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines; - let head_take = head_lines.min(total_lines); - let tail_take = tail_lines.min(total_lines.saturating_sub(head_take)); - let omitted = total_lines.saturating_sub(head_take + tail_take); - let escaped_line = regex_lite::escape(line); - if omitted == 0 { - return format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$", - max_bytes = model_format_max_bytes(), - ); - } - format!( - r"(?s)^Total output lines: {total_lines}\n\n(?P{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$", - ) + assert_eq!( + "Total output lines: 1\n\n…13 chars truncated…t", + formatted_truncate_text(content, TruncationPolicy::Bytes(1)), + ); } #[test] - fn truncate_middle_returns_original_when_under_limit() { + fn truncate_tokens_less_than_placeholder_returns_placeholder() { + let content = "example output"; + + assert_eq!( + "Total output lines: 1\n\nex…3 tokens truncated…ut", + formatted_truncate_text(content, TruncationPolicy::Tokens(1)), + ); + } + + #[test] + fn truncate_tokens_under_limit_returns_original() { + let content = "example output"; + + assert_eq!( + content, + formatted_truncate_text(content, TruncationPolicy::Tokens(10)), + ); + } + + #[test] + fn truncate_bytes_under_limit_returns_original() { + let content = "example output"; + + assert_eq!( + content, + formatted_truncate_text(content, TruncationPolicy::Bytes(20)), + ); + } + + #[test] + fn truncate_tokens_over_limit_returns_truncated() { + let content = "this is an example of a long output that should be truncated"; + + assert_eq!( + "Total output lines: 1\n\nthis is an…10 tokens truncated… truncated", + formatted_truncate_text(content, TruncationPolicy::Tokens(5)), + ); + } + + #[test] + fn truncate_bytes_over_limit_returns_truncated() { + let content = "this is an example of a long output that should be truncated"; + + assert_eq!( + "Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated", + formatted_truncate_text(content, TruncationPolicy::Bytes(30)), + ); + } + + #[test] + fn truncate_bytes_reports_original_line_count_when_truncated() { + let content = + "this is an example of a long output that should be truncated\nalso some other line"; + + assert_eq!( + "Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line", + formatted_truncate_text(content, TruncationPolicy::Bytes(30)), + ); + } + + #[test] + fn truncate_tokens_reports_original_line_count_when_truncated() { + let content = + "this is an example of a long output that should be truncated\nalso some other line"; + + assert_eq!( + "Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line", + formatted_truncate_text(content, TruncationPolicy::Tokens(10)), + ); + } + + #[test] + fn truncate_with_token_budget_returns_original_when_under_limit() { let s = "short output"; let limit = 100; - let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit)); - let (out, original) = truncate_with_token_budget(s, limit, source); + let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(limit)); assert_eq!(out, s); assert_eq!(original, None); } #[test] - fn truncate_middle_reports_truncation_at_zero_limit() { + fn truncate_with_token_budget_reports_truncation_at_zero_limit() { let s = "abcdef"; - let source = TruncationSource::Policy(TruncationPolicy::Tokens(0)); - let (out, original) = truncate_with_token_budget(s, 0, source); - assert_eq!(out, "[…2 tokens truncated…]"); - assert_eq!(original, Some(approx_token_count(s) as u64)); + let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(0)); + assert_eq!(out, "…2 tokens truncated…"); + assert_eq!(original, Some(2)); } #[test] - fn truncate_middle_enforces_token_budget() { - let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa"; - let max_tokens = 12; - let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); - let (out, original) = truncate_with_token_budget(s, max_tokens, source); - assert!(out.contains("tokens truncated")); - assert_eq!(original, Some(approx_token_count(s) as u64)); - assert!(out.len() < s.len(), "truncated output should be shorter"); - } - - #[test] - fn truncate_middle_handles_utf8_content() { + fn truncate_middle_tokens_handles_utf8_content() { let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; - let max_tokens = 8; - let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens)); - let (out, tokens) = truncate_with_token_budget(s, max_tokens, source); - - assert!(out.contains("tokens truncated")); - assert!(!out.contains('\u{fffd}')); - assert_eq!(tokens, Some(approx_token_count(s) as u64)); - assert!(out.len() < s.len(), "UTF-8 content should be shortened"); + let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8)); + assert_eq!(out, "😀😀😀😀…8 tokens truncated…"); + assert_eq!(tokens, Some(16)); } #[test] - fn format_exec_output_truncates_large_error() { - let line = "very long execution error line that should trigger truncation\n"; - let large_error = line.repeat(2_500); // way beyond both byte and line limits - - let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes()); - - let total_lines = large_error.lines().count(); - let pattern = truncated_message_pattern(line, total_lines); - let regex = Regex::new(&pattern).unwrap_or_else(|err| { - panic!("failed to compile regex {pattern}: {err}"); - }); - let captures = regex - .captures(&truncated) - .unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}")); - let body = captures - .name("body") - .expect("missing body capture") - .as_str(); - assert!( - body.len() <= model_format_max_bytes(), - "body exceeds byte limit: {} bytes", - body.len() - ); - assert_ne!(truncated, large_error); - } - - #[test] - fn format_exec_output_marks_byte_truncation_without_omitted_lines() { - let max_bytes = model_format_max_bytes(); - let long_line = "a".repeat(max_bytes + 50); - let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes); - - assert_ne!(truncated, long_line); - let removed_bytes = long_line.len().saturating_sub(max_bytes); - let marker_line = - format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]"); - assert!( - truncated.contains(&marker_line), - "missing byte truncation marker: {truncated}" - ); - assert!( - !truncated.contains("omitted"), - "line omission marker should not appear when no lines were dropped: {truncated}" - ); - } - - #[test] - fn format_exec_output_returns_original_when_within_limits() { - let content = "example output\n".repeat(10); - - assert_eq!( - truncate_with_line_bytes_budget(&content, model_format_max_bytes()), - content - ); - } - - #[test] - fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() { - let total_lines = MODEL_FORMAT_MAX_LINES + 100; - let content: String = (0..total_lines) - .map(|idx| format!("line-{idx}\n")) - .collect(); - - let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); - - let omitted = total_lines - MODEL_FORMAT_MAX_LINES; - let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]"); - - assert!( - truncated.contains(&expected_marker), - "missing omitted marker: {truncated}" - ); - assert!( - truncated.contains("line-0\n"), - "expected head line to remain: {truncated}" - ); - - let last_line = format!("line-{}\n", total_lines - 1); - assert!( - truncated.contains(&last_line), - "expected tail line to remain: {truncated}" - ); - } - - #[test] - fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() { - let total_lines = MODEL_FORMAT_MAX_LINES + 42; - let long_line = "x".repeat(256); - let content: String = (0..total_lines) - .map(|idx| format!("line-{idx}-{long_line}\n")) - .collect(); - - let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes()); - - assert!( - truncated.contains("[... omitted 42 of 298 lines ...]"), - "expected omitted marker when line count exceeds limit: {truncated}" - ); - assert!( - !truncated.contains("byte limit"), - "line omission marker should take precedence over byte marker: {truncated}" - ); + fn truncate_middle_bytes_handles_utf8_content() { + let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n"; + let out = truncate_text(s, TruncationPolicy::Bytes(20)); + assert_eq!(out, "😀😀…31 chars truncated…"); } #[test] diff --git a/codex-rs/core/src/unified_exec/session.rs b/codex-rs/core/src/unified_exec/session.rs index 82d6e4137..b37a9cdb5 100644 --- a/codex-rs/core/src/unified_exec/session.rs +++ b/codex-rs/core/src/unified_exec/session.rs @@ -15,7 +15,7 @@ use crate::exec::SandboxType; use crate::exec::StreamOutput; use crate::exec::is_likely_sandbox_denied; use crate::truncate::TruncationPolicy; -use crate::truncate::truncate_text; +use crate::truncate::formatted_truncate_text; use codex_utils_pty::ExecCommandSession; use codex_utils_pty::SpawnedPty; @@ -167,7 +167,7 @@ impl UnifiedExecSession { }; if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) { - let snippet = truncate_text( + let snippet = formatted_truncate_text( &aggregated_text, TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS), ); diff --git a/codex-rs/core/src/unified_exec/session_manager.rs b/codex-rs/core/src/unified_exec/session_manager.rs index 57c60f2b8..f6dce8644 100644 --- a/codex-rs/core/src/unified_exec/session_manager.rs +++ b/codex-rs/core/src/unified_exec/session_manager.rs @@ -25,7 +25,7 @@ use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; use crate::truncate::TruncationPolicy; use crate::truncate::approx_token_count; -use crate::truncate::truncate_text; +use crate::truncate::formatted_truncate_text; use super::ExecCommandRequest; use super::SessionEntry; @@ -72,7 +72,7 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); + let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let chunk_id = generate_chunk_id(); let has_exited = session.has_exited(); let stored_id = self @@ -179,7 +179,7 @@ impl UnifiedExecSessionManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); + let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let original_token_count = approx_token_count(&text); let chunk_id = generate_chunk_id(); diff --git a/codex-rs/core/src/user_shell_command.rs b/codex-rs/core/src/user_shell_command.rs index 7f0731c96..857e01c06 100644 --- a/codex-rs/core/src/user_shell_command.rs +++ b/codex-rs/core/src/user_shell_command.rs @@ -3,6 +3,7 @@ use std::time::Duration; use codex_protocol::models::ContentItem; use codex_protocol::models::ResponseItem; +use crate::codex::TurnContext; use crate::exec::ExecToolCallOutput; use crate::tools::format_exec_output_str; @@ -20,7 +21,11 @@ fn format_duration_line(duration: Duration) -> String { format!("Duration: {duration_seconds:.4} seconds") } -fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutput) -> String { +fn format_user_shell_command_body( + command: &str, + exec_output: &ExecToolCallOutput, + turn_context: &TurnContext, +) -> String { let mut sections = Vec::new(); sections.push("".to_string()); sections.push(command.to_string()); @@ -29,25 +34,33 @@ fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutpu sections.push(format!("Exit code: {}", exec_output.exit_code)); sections.push(format_duration_line(exec_output.duration)); sections.push("Output:".to_string()); - sections.push(format_exec_output_str(exec_output)); + sections.push(format_exec_output_str( + exec_output, + turn_context.truncation_policy, + )); sections.push("".to_string()); sections.join("\n") } -pub fn format_user_shell_command_record(command: &str, exec_output: &ExecToolCallOutput) -> String { - let body = format_user_shell_command_body(command, exec_output); +pub fn format_user_shell_command_record( + command: &str, + exec_output: &ExecToolCallOutput, + turn_context: &TurnContext, +) -> String { + let body = format_user_shell_command_body(command, exec_output, turn_context); format!("{USER_SHELL_COMMAND_OPEN}\n{body}\n{USER_SHELL_COMMAND_CLOSE}") } pub fn user_shell_command_record_item( command: &str, exec_output: &ExecToolCallOutput, + turn_context: &TurnContext, ) -> ResponseItem { ResponseItem::Message { id: None, role: "user".to_string(), content: vec![ContentItem::InputText { - text: format_user_shell_command_record(command, exec_output), + text: format_user_shell_command_record(command, exec_output, turn_context), }], } } @@ -55,6 +68,7 @@ pub fn user_shell_command_record_item( #[cfg(test)] mod tests { use super::*; + use crate::codex::make_session_and_context; use crate::exec::StreamOutput; use pretty_assertions::assert_eq; @@ -76,7 +90,8 @@ mod tests { duration: Duration::from_secs(1), timed_out: false, }; - let item = user_shell_command_record_item("echo hi", &exec_output); + let (_, turn_context) = make_session_and_context(); + let item = user_shell_command_record_item("echo hi", &exec_output, &turn_context); let ResponseItem::Message { content, .. } = item else { panic!("expected message"); }; @@ -99,7 +114,8 @@ mod tests { duration: Duration::from_millis(120), timed_out: false, }; - let record = format_user_shell_command_record("false", &exec_output); + let (_, turn_context) = make_session_and_context(); + let record = format_user_shell_command_record("false", &exec_output, &turn_context); assert_eq!( record, "\n\nfalse\n\n\nExit code: 42\nDuration: 0.1200 seconds\nOutput:\ncombined output wins\n\n" diff --git a/codex-rs/core/tests/suite/shell_serialization.rs b/codex-rs/core/tests/suite/shell_serialization.rs index aec24ccf4..7e15d2625 100644 --- a/codex-rs/core/tests/suite/shell_serialization.rs +++ b/codex-rs/core/tests/suite/shell_serialization.rs @@ -407,7 +407,6 @@ $"#; #[tokio::test(flavor = "multi_thread", worker_threads = 2)] #[test_case(ShellModelOutput::Shell)] -#[test_case(ShellModelOutput::ShellCommand)] #[test_case(ShellModelOutput::LocalShell)] async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> { skip_if_no_network!(Ok(())); @@ -417,6 +416,7 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp config.model = "gpt-5.1-codex".to_string(); config.model_family = find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + config.tool_output_token_limit = Some(200); if matches!(output_type, ShellModelOutput::ShellCommand) { config.features.enable(Feature::ShellCommandTool); } @@ -457,10 +457,7 @@ Output: 4 5 6 -.* -\[\.{3} omitted \d+ of 400 lines \.{3}\] - -.* +.*…45 tokens truncated….* 396 397 398 @@ -858,7 +855,7 @@ async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> { let expected_pattern = r"(?s)^Exit code: 0 Wall time: [0-9]+(?:\.[0-9]+)? seconds Output: -1{5000}$"; // TODO: this is very wrong!!! +1{10000}$"; assert_regex_match(expected_pattern, output); Ok(()) @@ -911,12 +908,8 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> { let expected_pattern = r"(?s)^Exit code: 0 Wall time: [0-9]+(?:\.[0-9]+)? seconds -Total output lines: 1 Output: -1* -\[... removed 1 bytes to fit 10000 byte limit ...\] - -$"; +1*…1 chars truncated…1*$"; assert_regex_match(expected_pattern, output); Ok(()) diff --git a/codex-rs/core/tests/suite/truncation.rs b/codex-rs/core/tests/suite/truncation.rs index 3cbbc6bd5..b05acf538 100644 --- a/codex-rs/core/tests/suite/truncation.rs +++ b/codex-rs/core/tests/suite/truncation.rs @@ -101,17 +101,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> { // Verifies that a standard tool call (shell) exceeding the model formatting // limits is truncated before being sent back to the model. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { +async fn tool_call_output_configured_limit_chars_type() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; // Use a model that exposes the generic shell tool. - let mut builder = test_codex().with_config(|config| { - config.model = "gpt-5.1-codex".to_string(); - config.model_family = - find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| { + config.tool_output_token_limit = Some(100_000); }); + let fixture = builder.build(&server).await?; let call_id = "shell-too-large"; @@ -120,13 +119,13 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { "command": [ "powershell", "-Command", - "for ($i=1; $i -le 400; $i++) { Write-Output $i }" + "for ($i=1; $i -le 100000; $i++) { Write-Output $i }" ], "timeout_ms": 5_000, }) } else { serde_json::json!({ - "command": ["/bin/sh", "-c", "seq 1 400"], + "command": ["/bin/sh", "-c", "seq 1 100000"], "timeout_ms": 5_000, }) }; @@ -162,14 +161,169 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { .context("function_call_output present for shell call")?; let output = output.replace("\r\n", "\n"); - // Expect plain text (not JSON) with truncation markers and line elision. + // Expect plain text (not JSON) containing the entire shell output. + assert!( + serde_json::from_str::(&output).is_err(), + "expected truncated shell output to be plain text" + ); + + assert_eq!(output.len(), 400094, "we should be almost 100k tokens"); + + assert!( + !output.contains("tokens truncated"), + "shell output should not contain tokens truncated marker: {output}" + ); + + Ok(()) +} + +// Verifies that a standard tool call (shell) exceeding the model formatting +// limits is truncated before being sent back to the model. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + // Use a model that exposes the generic shell tool. + let mut builder = test_codex().with_model("gpt-5.1"); + + let fixture = builder.build(&server).await?; + + let call_id = "shell-too-large"; + let args = if cfg!(windows) { + serde_json::json!({ + "command": [ + "powershell", + "-Command", + "for ($i=1; $i -le 100000; $i++) { Write-Output $i }" + ], + "timeout_ms": 5_000, + }) + } else { + serde_json::json!({ + "command": ["/bin/sh", "-c", "seq 1 100000"], + "timeout_ms": 5_000, + }) + }; + + // First response: model tells us to run the tool; second: complete the turn. + mount_sse_once( + &server, + sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let mock2 = mount_sse_once( + &server, + sse(vec![ + responses::ev_assistant_message("msg-1", "done"), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess) + .await?; + + // Inspect what we sent back to the model; it should contain a truncated + // function_call_output for the shell call. + let output = mock2 + .single_request() + .function_call_output_text(call_id) + .context("function_call_output present for shell call")?; + let output = output.replace("\r\n", "\n"); + + // Expect plain text (not JSON) containing the entire shell output. + assert!( + serde_json::from_str::(&output).is_err(), + "expected truncated shell output to be plain text" + ); + + assert_eq!(output.len(), 9976); // ~10k characters + let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#; + + assert_regex_match(truncated_pattern, &output); + + Ok(()) +} + +// Verifies that a standard tool call (shell) exceeding the model formatting +// limits is truncated before being sent back to the model. +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + // Use a model that exposes the generic shell tool. + let mut builder = test_codex().with_config(|config| { + config.model = "gpt-5.1-codex".to_string(); + config.model_family = + find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family"); + }); + let fixture = builder.build(&server).await?; + + let call_id = "shell-too-large"; + let args = if cfg!(windows) { + serde_json::json!({ + "command": [ + "powershell", + "-Command", + "for ($i=1; $i -le 100000; $i++) { Write-Output $i }" + ], + "timeout_ms": 5_000, + }) + } else { + serde_json::json!({ + "command": ["/bin/sh", "-c", "seq 1 100000"], + "timeout_ms": 5_000, + }) + }; + + // First response: model tells us to run the tool; second: complete the turn. + mount_sse_once( + &server, + sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let mock2 = mount_sse_once( + &server, + sse(vec![ + responses::ev_assistant_message("msg-1", "done"), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + fixture + .submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess) + .await?; + + // Inspect what we sent back to the model; it should contain a truncated + // function_call_output for the shell call. + let output = mock2 + .single_request() + .function_call_output_text(call_id) + .context("function_call_output present for shell call")?; + let output = output.replace("\r\n", "\n"); + + // Expect plain text (not JSON) containing the entire shell output. assert!( serde_json::from_str::(&output).is_err(), "expected truncated shell output to be plain text" ); let truncated_pattern = r#"(?s)^Exit code: 0 -Wall time: .* seconds -Total output lines: 400 +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Total output lines: 100000 Output: 1 2 @@ -177,15 +331,9 @@ Output: 4 5 6 -.* -\[\.{3} omitted 144 of 400 lines \.{3}\] - -.* -396 -397 -398 -399 -400 +.*…137224 tokens truncated.* +99999 +100000 $"#; assert_regex_match(truncated_pattern, &output); @@ -211,13 +359,13 @@ async fn tool_call_output_truncated_only_once() -> Result<()> { "command": [ "powershell", "-Command", - "for ($i=1; $i -le 2000; $i++) { Write-Output $i }" + "for ($i=1; $i -le 10000; $i++) { Write-Output $i }" ], "timeout_ms": 5_000, }) } else { serde_json::json!({ - "command": ["/bin/sh", "-c", "seq 1 2000"], + "command": ["/bin/sh", "-c", "seq 1 10000"], "timeout_ms": 5_000, }) }; @@ -249,11 +397,11 @@ async fn tool_call_output_truncated_only_once() -> Result<()> { .function_call_output_text(call_id) .context("function_call_output present for shell call")?; - let total_line_headers = output.matches("Total output lines:").count(); + let truncation_markers = output.matches("tokens truncated").count(); assert_eq!( - total_line_headers, 1, - "shell output should carry only one truncation header: {output}" + truncation_markers, 1, + "shell output should carry only one truncation marker: {output}" ); Ok(()) @@ -321,6 +469,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> disabled_tools: None, }, ); + config.tool_output_token_limit = Some(500); }); let fixture = builder.build(&server).await?; @@ -337,12 +486,6 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> .function_call_output_text(call_id) .context("function_call_output present for rmcp call")?; - // Expect plain text with token-based truncation marker; the original JSON body - // is truncated in the middle of the echo string. - assert!( - serde_json::from_str::(&output).is_err(), - "expected truncated MCP output to be plain text" - ); assert!( !output.contains("Total output lines:"), "MCP output should not include line-based truncation header: {output}" @@ -350,6 +493,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#; assert_regex_match(truncated_pattern, &output); + assert!(output.len() < 2500, "{}", output.len()); Ok(()) } @@ -501,7 +645,9 @@ async fn token_policy_marker_reports_tokens() -> Result<()> { .function_call_output_text(call_id) .context("shell output present")?; - assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output); + let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#; + + assert_regex_match(pattern, &output); Ok(()) } @@ -552,14 +698,16 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> { .function_call_output_text(call_id) .context("shell output present")?; - assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output); + let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#; + + assert_regex_match(pattern, &output); Ok(()) } -// Overriding config with a large token budget should avoid truncation. +// Shell tool output should remain intact when the config opts into a large token budget. #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn large_budget_avoids_truncation() -> Result<()> { +async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; @@ -576,6 +724,7 @@ async fn large_budget_avoids_truncation() -> Result<()> { "command": ["/bin/sh", "-c", "seq 1 1000"], "timeout_ms": 5_000, }); + let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect(); mount_sse_once( &server, @@ -607,6 +756,10 @@ async fn large_budget_avoids_truncation() -> Result<()> { .function_call_output_text(call_id) .context("shell output present")?; + assert!( + output.ends_with(&expected_body), + "expected entire shell output when budget increased: {output}" + ); assert!( !output.contains("truncated"), "output should remain untruncated with ample budget" @@ -614,3 +767,101 @@ async fn large_budget_avoids_truncation() -> Result<()> { Ok(()) } + +// MCP server output should also remain intact when the config increases the token limit. +#[tokio::test(flavor = "multi_thread", worker_threads = 1)] +async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + + let call_id = "rmcp-untruncated"; + let server_name = "rmcp"; + let tool_name = format!("mcp__{server_name}__echo"); + let large_msg = "a".repeat(80_000); + let args_json = serde_json::json!({ "message": large_msg }); + + mount_sse_once( + &server, + sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, &tool_name, &args_json.to_string()), + responses::ev_completed("resp-1"), + ]), + ) + .await; + let mock2 = mount_sse_once( + &server, + sse(vec![ + responses::ev_assistant_message("msg-1", "rmcp echo tool completed."), + responses::ev_completed("resp-2"), + ]), + ) + .await; + + let rmcp_test_server_bin = CargoBuild::new() + .package("codex-rmcp-client") + .bin("test_stdio_server") + .run()? + .path() + .to_string_lossy() + .into_owned(); + + let mut builder = test_codex().with_config(move |config| { + config.features.enable(Feature::RmcpClient); + config.tool_output_token_limit = Some(50_000); + config.mcp_servers.insert( + server_name.to_string(), + codex_core::config::types::McpServerConfig { + transport: codex_core::config::types::McpServerTransportConfig::Stdio { + command: rmcp_test_server_bin, + args: Vec::new(), + env: None, + env_vars: Vec::new(), + cwd: None, + }, + enabled: true, + startup_timeout_sec: Some(std::time::Duration::from_secs(10)), + tool_timeout_sec: None, + enabled_tools: None, + disabled_tools: None, + }, + ); + }); + let fixture = builder.build(&server).await?; + + fixture + .submit_turn_with_policy( + "call the rmcp echo tool with a very large message", + SandboxPolicy::ReadOnly, + ) + .await?; + + let output = mock2 + .single_request() + .function_call_output_text(call_id) + .context("function_call_output present for rmcp call")?; + + let parsed: Value = serde_json::from_str(&output)?; + assert_eq!( + output.len(), + 80031, + "parsed MCP output should retain its serialized length" + ); + let expected_echo = format!("ECHOING: {large_msg}"); + let echo_str = parsed["echo"] + .as_str() + .context("echo field should be a string in rmcp echo output")?; + assert_eq!( + echo_str.len(), + expected_echo.len(), + "echo length should match" + ); + assert_eq!(echo_str, expected_echo); + assert!( + !output.contains("truncated"), + "output should not include truncation markers when limit is raised: {output}" + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/unified_exec.rs b/codex-rs/core/tests/suite/unified_exec.rs index 062f48dbb..bf926ef96 100644 --- a/codex-rs/core/tests/suite/unified_exec.rs +++ b/codex-rs/core/tests/suite/unified_exec.rs @@ -1586,8 +1586,7 @@ PY let large_output = outputs.get(call_id).expect("missing large output summary"); let output_text = large_output.output.replace("\r\n", "\n"); - let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}"#; - + let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$"; assert_regex_match(truncated_pattern, &output_text); let original_tokens = large_output diff --git a/codex-rs/core/tests/suite/user_shell_cmd.rs b/codex-rs/core/tests/suite/user_shell_cmd.rs index 0e9585ba4..635813d7a 100644 --- a/codex-rs/core/tests/suite/user_shell_cmd.rs +++ b/codex-rs/core/tests/suite/user_shell_cmd.rs @@ -207,10 +207,16 @@ async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyh } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] +#[cfg(not(target_os = "windows"))] // TODO: unignore on windows async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<()> { let server = responses::start_mock_server().await; - let mut builder = core_test_support::test_codex::test_codex(); - let test = builder.build(&server).await?; + let builder = core_test_support::test_codex::test_codex(); + let test = builder + .with_config(|config| { + config.tool_output_token_limit = Some(100); + }) + .build(&server) + .await?; #[cfg(windows)] let command = r#"for ($i=1; $i -le 400; $i++) { Write-Output $i }"#.to_string(); @@ -249,10 +255,9 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<( .expect("command message recorded in request"); let command_message = command_message.replace("\r\n", "\n"); - let head = (1..=128).map(|i| format!("{i}\n")).collect::(); - let tail = (273..=400).map(|i| format!("{i}\n")).collect::(); - let truncated_body = - format!("Total output lines: 400\n\n{head}\n[... omitted 144 of 400 lines ...]\n\n{tail}"); + let head = (1..=69).map(|i| format!("{i}\n")).collect::(); + let tail = (352..=400).map(|i| format!("{i}\n")).collect::(); + let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}"); let escaped_command = escape(&command); let escaped_truncated_body = escape(&truncated_body); let expected_pattern = format!( @@ -270,6 +275,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> { let server = start_mock_server().await; let mut builder = test_codex().with_config(|config| { + config.tool_output_token_limit = Some(100); config.model = "gpt-5.1-codex".to_string(); config.model_family = find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");