Move shell to use truncate_text (#6842)

Move shell to use the configurable `truncate_text`

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
This commit is contained in:
Ahmed Ibrahim 2025-11-19 01:56:08 -08:00 committed by GitHub
parent 75f38f16dd
commit efebc62fb7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 583 additions and 498 deletions

View file

@ -165,11 +165,9 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
));
let mut output = parsed.output.clone();
if let Some(total_lines) = extract_total_output_lines(&parsed.output) {
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {
sections.push(format!("Total output lines: {total_lines}"));
if let Some(stripped) = strip_total_output_header(&output) {
output = stripped.to_string();
}
output = stripped.to_string();
}
sections.push("Output:".to_string());
@ -178,19 +176,12 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
sections.join("\n")
}
fn extract_total_output_lines(output: &str) -> Option<u32> {
let marker_start = output.find("[... omitted ")?;
let marker = &output[marker_start..];
let (_, after_of) = marker.split_once(" of ")?;
let (total_segment, _) = after_of.split_once(' ')?;
total_segment.parse::<u32>().ok()
}
fn strip_total_output_header(output: &str) -> Option<&str> {
fn strip_total_output_header(output: &str) -> Option<(&str, u32)> {
let after_prefix = output.strip_prefix("Total output lines: ")?;
let (_, remainder) = after_prefix.split_once('\n')?;
let (total_segment, remainder) = after_prefix.split_once('\n')?;
let total_lines = total_segment.parse::<u32>().ok()?;
let remainder = remainder.strip_prefix('\n').unwrap_or(remainder);
Some(remainder)
Some((remainder, total_lines))
}
#[derive(Debug)]

View file

@ -2479,8 +2479,9 @@ mod tests {
duration: StdDuration::from_secs(1),
timed_out: true,
};
let (_, turn_context) = make_session_and_context();
let out = format_exec_output_str(&exec);
let out = format_exec_output_str(&exec, turn_context.truncation_policy);
assert_eq!(
out,

View file

@ -145,13 +145,17 @@ impl ContextManager {
}
fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
let policy_with_serialization_budget = policy.mul(1.2);
match item {
ResponseItem::FunctionCallOutput { call_id, output } => {
let truncated = truncate_text(output.content.as_str(), policy);
let truncated_items = output
.content_items
.as_ref()
.map(|items| truncate_function_output_items_with_policy(items, policy));
let truncated =
truncate_text(output.content.as_str(), policy_with_serialization_budget);
let truncated_items = output.content_items.as_ref().map(|items| {
truncate_function_output_items_with_policy(
items,
policy_with_serialization_budget,
)
});
ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: FunctionCallOutputPayload {
@ -162,7 +166,7 @@ impl ContextManager {
}
}
ResponseItem::CustomToolCallOutput { call_id, output } => {
let truncated = truncate_text(output, policy);
let truncated = truncate_text(output, policy_with_serialization_budget);
ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: truncated,

View file

@ -12,8 +12,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
const EXEC_FORMAT_MAX_LINES: usize = 256;
const EXEC_FORMAT_MAX_BYTES: usize = 10_000;
const EXEC_FORMAT_MAX_TOKENS: usize = 2_500;
fn assistant_msg(text: &str) -> ResponseItem {
ResponseItem::Message {
@ -56,6 +56,10 @@ fn reasoning_msg(text: &str) -> ResponseItem {
}
}
fn truncate_exec_output(content: &str) -> String {
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
}
#[test]
fn filters_non_api_messages() {
let mut h = ContextManager::default();
@ -228,7 +232,7 @@ fn normalization_retains_local_shell_outputs() {
ResponseItem::FunctionCallOutput {
call_id: "shell-1".to_string(),
output: FunctionCallOutputPayload {
content: "ok".to_string(),
content: "Total output lines: 1\n\nok".to_string(),
..Default::default()
},
},
@ -330,8 +334,8 @@ fn record_items_respects_custom_token_limit() {
assert!(stored.content.contains("tokens truncated"));
}
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
let pattern = truncated_message_pattern(line, total_lines);
fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) {
let pattern = truncated_message_pattern(line);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
@ -347,23 +351,18 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz
"body exceeds byte limit: {} bytes",
body.len()
);
let removed: usize = captures
.name("removed")
.expect("missing removed capture")
.as_str()
.parse()
.unwrap_or_else(|err| panic!("invalid removed tokens: {err}"));
assert_eq!(removed, expected_removed, "mismatched removed token count");
}
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_lines = EXEC_FORMAT_MAX_LINES / 2;
let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines;
let head_take = head_lines.min(total_lines);
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
fn truncated_message_pattern(line: &str) -> String {
let escaped_line = regex_lite::escape(line);
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$",
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
format!(r"(?s)^(?P<body>{escaped_line}.*?)(?:\r?)?…(?P<removed>\d+) tokens truncated…(?:.*)?$")
}
#[test]
@ -371,27 +370,18 @@ fn format_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES);
let truncated = truncate_exec_output(&large_error);
let total_lines = large_error.lines().count();
assert_truncated_message_matches(&truncated, line, total_lines);
assert_truncated_message_matches(&truncated, line, 36250);
assert_ne!(truncated, large_error);
}
#[test]
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50);
let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES);
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 10000);
let truncated = truncate_exec_output(&long_line);
assert_ne!(truncated, long_line);
let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES);
let marker_line = format!(
"[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]"
);
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert_truncated_message_matches(&truncated, "a", 2500);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
@ -401,34 +391,25 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
#[test]
fn format_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(
truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES),
content
);
assert_eq!(truncate_exec_output(&content), content);
}
#[test]
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = EXEC_FORMAT_MAX_LINES + 100;
let total_lines = 2_000;
let filler = "x".repeat(64);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}\n"))
.map(|idx| format!("line-{idx}-{filler}\n"))
.collect();
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
let omitted = total_lines - EXEC_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
let truncated = truncate_exec_output(&content);
assert_truncated_message_matches(&truncated, "line-0-", 34_723);
assert!(
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
truncated.contains("line-0-"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}\n", total_lines - 1);
let last_line = format!("line-{}-", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
@ -437,22 +418,15 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
#[test]
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = EXEC_FORMAT_MAX_LINES + 42;
let total_lines = 300;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
let truncated = truncate_exec_output(&content);
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("byte limit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
assert_truncated_message_matches(&truncated, "line-0-", 17_423);
}
//TODO(aibrahim): run CI in release mode.

View file

@ -1,5 +1,4 @@
mod history;
mod normalize;
pub(crate) use crate::truncate::truncate_with_line_bytes_budget;
pub(crate) use history::ContextManager;

View file

@ -122,7 +122,11 @@ impl SessionTask for UserShellCommandTask {
duration: Duration::ZERO,
timed_out: false,
};
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
let output_items = [user_shell_command_record_item(
&raw_command,
&exec_output,
&turn_context,
)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;
@ -164,12 +168,19 @@ impl SessionTask for UserShellCommandTask {
aggregated_output: output.aggregated_output.text.clone(),
exit_code: output.exit_code,
duration: output.duration,
formatted_output: format_exec_output_str(&output),
formatted_output: format_exec_output_str(
&output,
turn_context.truncation_policy,
),
}),
)
.await;
let output_items = [user_shell_command_record_item(&raw_command, &output)];
let output_items = [user_shell_command_record_item(
&raw_command,
&output,
&turn_context,
)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;
@ -201,11 +212,18 @@ impl SessionTask for UserShellCommandTask {
aggregated_output: exec_output.aggregated_output.text.clone(),
exit_code: exec_output.exit_code,
duration: exec_output.duration,
formatted_output: format_exec_output_str(&exec_output),
formatted_output: format_exec_output_str(
&exec_output,
turn_context.truncation_policy,
),
}),
)
.await;
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
let output_items = [user_shell_command_record_item(
&raw_command,
&exec_output,
&turn_context,
)];
session
.record_conversation_items(turn_context.as_ref(), &output_items)
.await;

View file

@ -242,12 +242,16 @@ impl ToolEmitter {
self.emit(ctx, ToolEventStage::Begin).await;
}
fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String {
fn format_exec_output_for_model(
&self,
output: &ExecToolCallOutput,
ctx: ToolEventCtx<'_>,
) -> String {
match self {
Self::Shell { freeform: true, .. } => {
super::format_exec_output_for_model_freeform(output)
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
}
_ => super::format_exec_output_for_model_structured(output),
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
}
}
@ -258,7 +262,7 @@ impl ToolEmitter {
) -> Result<String, FunctionCallError> {
let (event, result) = match out {
Ok(output) => {
let content = self.format_exec_output_for_model(&output);
let content = self.format_exec_output_for_model(&output, ctx);
let exit_code = output.exit_code;
let event = ToolEventStage::Success(output);
let result = if exit_code == 0 {
@ -270,7 +274,7 @@ impl ToolEmitter {
}
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
let response = self.format_exec_output_for_model(&output);
let response = self.format_exec_output_for_model(&output, ctx);
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
let result = Err(FunctionCallError::RespondToModel(response));
(event, result)
@ -359,7 +363,7 @@ async fn emit_exec_stage(
aggregated_output: output.aggregated_output.text.clone(),
exit_code: output.exit_code,
duration: output.duration,
formatted_output: format_exec_output_str(&output),
formatted_output: format_exec_output_str(&output, ctx.turn.truncation_policy),
};
emit_exec_end(ctx, exec_input, exec_result).await;
}

View file

@ -9,9 +9,10 @@ pub mod runtimes;
pub mod sandboxing;
pub mod spec;
use crate::context_manager::truncate_with_line_bytes_budget;
use crate::exec::ExecToolCallOutput;
use crate::truncate::truncate_formatted_exec_output;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::truncate::truncate_text;
pub use router::ToolRouter;
use serde::Serialize;
@ -21,12 +22,12 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
"[... telemetry preview truncated ...]";
// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
/// Format the combined exec output for sending back to the model.
/// Includes exit code and duration metadata; truncates large bodies safely.
pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String {
pub fn format_exec_output_for_model_structured(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
let ExecToolCallOutput {
exit_code,
duration,
@ -48,7 +49,7 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput)
// round to 1 decimal place
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
let formatted_output = format_exec_output_str(exec_output);
let formatted_output = format_exec_output_str(exec_output, truncation_policy);
let payload = ExecOutput {
output: &formatted_output,
@ -62,18 +63,16 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput)
serde_json::to_string(&payload).expect("serialize ExecOutput")
}
pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String {
pub fn format_exec_output_for_model_freeform(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
// round to 1 decimal place
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
let total_lines = exec_output.aggregated_output.text.lines().count();
let formatted_output = truncate_formatted_exec_output(
&exec_output.aggregated_output.text,
total_lines,
SHELL_OUTPUT_MAX_BYTES,
256, // TODO: to be removed
);
let formatted_output = truncate_text(&exec_output.aggregated_output.text, truncation_policy);
let mut sections = Vec::new();
@ -89,7 +88,10 @@ pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -
sections.join("\n")
}
pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
pub fn format_exec_output_str(
exec_output: &ExecToolCallOutput,
truncation_policy: TruncationPolicy,
) -> String {
let ExecToolCallOutput {
aggregated_output, ..
} = exec_output;
@ -106,5 +108,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
};
// Truncate for model consumption before serialization.
truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
formatted_truncate_text(&body, truncation_policy)
}

View file

@ -2,11 +2,8 @@
//! and suffix on UTF-8 boundaries, and helpers for line/tokenbased truncation
//! used across the core crate.
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_utils_string::take_bytes_at_char_boundary;
use codex_utils_string::take_last_bytes_at_char_boundary;
use crate::config::Config;
use codex_protocol::models::FunctionCallOutputContentItem;
const APPROX_BYTES_PER_TOKEN: usize = 4;
@ -17,6 +14,18 @@ pub enum TruncationPolicy {
}
impl TruncationPolicy {
/// Scale the underlying budget by `multiplier`, rounding up to avoid under-budgeting.
pub fn mul(self, multiplier: f64) -> Self {
match self {
TruncationPolicy::Bytes(bytes) => {
TruncationPolicy::Bytes((bytes as f64 * multiplier).ceil() as usize)
}
TruncationPolicy::Tokens(tokens) => {
TruncationPolicy::Tokens((tokens as f64 * multiplier).ceil() as usize)
}
}
}
pub fn new(config: &Config) -> Self {
let config_token_limit = config.tool_output_token_limit;
@ -65,34 +74,20 @@ impl TruncationPolicy {
}
}
/// Format a block of exec/tool output for model consumption, truncating by
/// lines and bytes while preserving head and tail segments.
pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String {
// TODO(aibrahim): to be removed
let lines_budget = 256;
// Head+tail truncation for the model: show the beginning and end with an elision.
// Clients still receive full streams; only this formatted summary is capped.
let total_lines = content.lines().count();
if content.len() <= bytes_budget && total_lines <= lines_budget {
pub(crate) fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
if content.len() <= policy.byte_budget() {
return content.to_string();
}
let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget);
format!("Total output lines: {total_lines}\n\n{output}")
let total_lines = content.lines().count();
let result = truncate_text(content, policy);
format!("Total output lines: {total_lines}\n\n{result}")
}
pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
match policy {
TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(
content,
bytes,
TruncationSource::Policy(TruncationPolicy::Bytes(bytes)),
),
TruncationPolicy::Tokens(tokens) => {
let (truncated, _) = truncate_with_token_budget(
content,
tokens,
TruncationSource::Policy(TruncationPolicy::Tokens(tokens)),
);
TruncationPolicy::Bytes(_) => truncate_with_byte_estimate(content, policy),
TruncationPolicy::Tokens(_) => {
let (truncated, _) = truncate_with_token_budget(content, policy);
truncated
}
}
@ -162,24 +157,18 @@ pub(crate) fn truncate_function_output_items_with_policy(
/// preserving the beginning and the end. Returns the possibly truncated string
/// and `Some(original_token_count)` if truncation occurred; otherwise returns
/// the original string and `None`.
fn truncate_with_token_budget(
s: &str,
max_tokens: usize,
source: TruncationSource,
) -> (String, Option<u64>) {
fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Option<u64>) {
if s.is_empty() {
return (String::new(), None);
}
let max_tokens = policy.token_budget();
let byte_len = s.len();
if max_tokens > 0 {
let small_threshold = approx_bytes_for_tokens(max_tokens / 4);
if small_threshold > 0 && byte_len <= small_threshold {
return (s.to_string(), None);
}
if max_tokens > 0 && byte_len <= approx_bytes_for_tokens(max_tokens) {
return (s.to_string(), None);
}
let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source);
let truncated = truncate_with_byte_estimate(s, policy);
let approx_total_usize = approx_token_count(s);
let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX);
if truncated == s {
@ -192,14 +181,19 @@ fn truncate_with_token_budget(
/// Truncate a string using a byte budget derived from the token budget, without
/// performing any real tokenization. This keeps the logic purely byte-based and
/// uses a bytes placeholder in the truncated output.
fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String {
fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
if s.is_empty() {
return String::new();
}
let total_chars = s.chars().count();
let max_bytes = policy.byte_budget();
if max_bytes == 0 {
// No budget to show content; just report that everything was truncated.
let marker = format_truncation_marker(source, removed_units_for_source(source, s.len()));
let marker = format_truncation_marker(
policy,
removed_units_for_source(policy, s.len(), total_chars),
);
return marker;
}
@ -208,127 +202,32 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour
}
let total_bytes = s.len();
let removed_bytes = total_bytes.saturating_sub(max_bytes);
let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes));
let marker_len = marker.len();
if marker_len >= max_bytes {
let truncated_marker = truncate_on_boundary(&marker, max_bytes);
return truncated_marker.to_string();
}
let keep_budget = max_bytes - marker_len;
let (left_budget, right_budget) = split_budget(keep_budget);
let (left_budget, right_budget) = split_budget(max_bytes);
let prefix_end = pick_prefix_end(s, left_budget);
let mut suffix_start = pick_suffix_start(s, right_budget);
if suffix_start < prefix_end {
suffix_start = prefix_end;
}
let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker);
let left_chars = s[..prefix_end].chars().count();
let right_chars = s[suffix_start..].chars().count();
let removed_chars = total_chars
.saturating_sub(left_chars)
.saturating_sub(right_chars);
if out.len() > max_bytes {
let boundary = truncate_on_boundary(&out, max_bytes);
out.truncate(boundary.len());
}
let marker = format_truncation_marker(
policy,
removed_units_for_source(policy, total_bytes.saturating_sub(max_bytes), removed_chars),
);
out
assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker)
}
pub(crate) fn truncate_formatted_exec_output(
content: &str,
total_lines: usize,
limit_bytes: usize,
limit_lines: usize,
) -> String {
error_on_double_truncation(content);
let head_lines: usize = limit_lines / 2;
let tail_lines: usize = limit_lines - head_lines; // 128
let head_bytes: usize = limit_bytes / 2;
let segments: Vec<&str> = content.split_inclusive('\n').collect();
let head_take = head_lines.min(segments.len());
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
let omitted = segments.len().saturating_sub(head_take + tail_take);
let head_slice_end: usize = segments
.iter()
.take(head_take)
.map(|segment| segment.len())
.sum();
let tail_slice_start: usize = if tail_take == 0 {
content.len()
} else {
content.len()
- segments
.iter()
.rev()
.take(tail_take)
.map(|segment| segment.len())
.sum::<usize>()
};
let head_slice = &content[..head_slice_end];
let tail_slice = &content[tail_slice_start..];
let truncated_by_bytes = content.len() > limit_bytes;
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
let marker = if omitted > 0 {
let marker_text = format_truncation_marker(
TruncationSource::LineOmission { total_lines },
u64::try_from(omitted).unwrap_or(u64::MAX),
);
Some(format!("\n{marker_text}\n\n"))
} else if truncated_by_bytes {
let removed_bytes =
u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX);
let marker_text =
format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes);
Some(format!("\n{marker_text}\n\n"))
} else {
None
};
let marker_len = marker.as_ref().map_or(0, String::len);
let base_head_budget = head_bytes.min(limit_bytes);
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
let mut result = String::with_capacity(limit_bytes.min(content.len()));
result.push_str(head_part);
if let Some(marker_text) = marker.as_ref() {
result.push_str(marker_text);
}
let remaining = limit_bytes.saturating_sub(result.len());
if remaining == 0 {
return result;
}
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
result.push_str(tail_part);
result
}
#[derive(Clone, Copy)]
pub enum TruncationSource {
Policy(TruncationPolicy),
LineOmission { total_lines: usize },
ByteLimit { limit_bytes: usize },
}
fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String {
match source {
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
format!("[…{removed_count} tokens truncated…]")
}
TruncationSource::Policy(TruncationPolicy::Bytes(_)) => {
format!("[…{removed_count} bytes truncated…]")
}
TruncationSource::LineOmission { total_lines } => {
format!("[... omitted {removed_count} of {total_lines} lines ...]")
}
TruncationSource::ByteLimit { limit_bytes } => {
format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]")
}
fn format_truncation_marker(policy: TruncationPolicy, removed_count: u64) -> String {
match policy {
TruncationPolicy::Tokens(_) => format!("{removed_count} tokens truncated…"),
TruncationPolicy::Bytes(_) => format!("{removed_count} chars truncated…"),
}
}
@ -337,12 +236,14 @@ fn split_budget(budget: usize) -> (usize, usize) {
(left, budget - left)
}
fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 {
match source {
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
approx_tokens_from_byte_count(removed_bytes)
}
_ => u64::try_from(removed_bytes).unwrap_or(u64::MAX),
fn removed_units_for_source(
policy: TruncationPolicy,
removed_bytes: usize,
removed_chars: usize,
) -> u64 {
match policy {
TruncationPolicy::Tokens(_) => approx_tokens_from_byte_count(removed_bytes),
TruncationPolicy::Bytes(_) => u64::try_from(removed_chars).unwrap_or(u64::MAX),
}
}
@ -350,7 +251,6 @@ fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String
let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1);
out.push_str(prefix);
out.push_str(marker);
out.push('\n');
out.push_str(suffix);
out
}
@ -405,203 +305,130 @@ fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
idx
}
fn error_on_double_truncation(content: &str) {
if content.contains("Total output lines:") && content.contains("omitted") {
tracing::error!(
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
);
}
}
#[cfg(test)]
mod tests {
use crate::config::OPENAI_DEFAULT_MODEL;
use crate::model_family::derive_default_model_family;
use crate::model_family::find_family_for_model;
use super::TruncationPolicy;
use super::TruncationSource;
use super::approx_token_count;
use super::formatted_truncate_text;
use super::truncate_function_output_items_with_policy;
use super::truncate_with_line_bytes_budget;
use super::truncate_text;
use super::truncate_with_token_budget;
use codex_protocol::models::FunctionCallOutputContentItem;
use pretty_assertions::assert_eq;
use regex_lite::Regex;
const MODEL_FORMAT_MAX_LINES: usize = 256;
#[test]
fn truncate_bytes_less_than_placeholder_returns_placeholder() {
let content = "example output";
fn model_format_max_bytes() -> usize {
find_family_for_model(OPENAI_DEFAULT_MODEL)
.unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL))
.truncation_policy
.byte_budget()
}
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
let head_take = head_lines.min(total_lines);
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
let omitted = total_lines.saturating_sub(head_take + tail_take);
let escaped_line = regex_lite::escape(line);
if omitted == 0 {
return format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$",
max_bytes = model_format_max_bytes(),
);
}
format!(
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
)
assert_eq!(
"Total output lines: 1\n\n…13 chars truncated…t",
formatted_truncate_text(content, TruncationPolicy::Bytes(1)),
);
}
#[test]
fn truncate_middle_returns_original_when_under_limit() {
fn truncate_tokens_less_than_placeholder_returns_placeholder() {
let content = "example output";
assert_eq!(
"Total output lines: 1\n\nex…3 tokens truncated…ut",
formatted_truncate_text(content, TruncationPolicy::Tokens(1)),
);
}
#[test]
fn truncate_tokens_under_limit_returns_original() {
let content = "example output";
assert_eq!(
content,
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
);
}
#[test]
fn truncate_bytes_under_limit_returns_original() {
let content = "example output";
assert_eq!(
content,
formatted_truncate_text(content, TruncationPolicy::Bytes(20)),
);
}
#[test]
fn truncate_tokens_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an…10 tokens truncated… truncated",
formatted_truncate_text(content, TruncationPolicy::Tokens(5)),
);
}
#[test]
fn truncate_bytes_over_limit_returns_truncated() {
let content = "this is an example of a long output that should be truncated";
assert_eq!(
"Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_bytes_reports_original_line_count_when_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line",
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
);
}
#[test]
fn truncate_tokens_reports_original_line_count_when_truncated() {
let content =
"this is an example of a long output that should be truncated\nalso some other line";
assert_eq!(
"Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line",
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
);
}
#[test]
fn truncate_with_token_budget_returns_original_when_under_limit() {
let s = "short output";
let limit = 100;
let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit));
let (out, original) = truncate_with_token_budget(s, limit, source);
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(limit));
assert_eq!(out, s);
assert_eq!(original, None);
}
#[test]
fn truncate_middle_reports_truncation_at_zero_limit() {
fn truncate_with_token_budget_reports_truncation_at_zero_limit() {
let s = "abcdef";
let source = TruncationSource::Policy(TruncationPolicy::Tokens(0));
let (out, original) = truncate_with_token_budget(s, 0, source);
assert_eq!(out, "[…2 tokens truncated…]");
assert_eq!(original, Some(approx_token_count(s) as u64));
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(0));
assert_eq!(out, "…2 tokens truncated…");
assert_eq!(original, Some(2));
}
#[test]
fn truncate_middle_enforces_token_budget() {
let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa";
let max_tokens = 12;
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
let (out, original) = truncate_with_token_budget(s, max_tokens, source);
assert!(out.contains("tokens truncated"));
assert_eq!(original, Some(approx_token_count(s) as u64));
assert!(out.len() < s.len(), "truncated output should be shorter");
}
#[test]
fn truncate_middle_handles_utf8_content() {
fn truncate_middle_tokens_handles_utf8_content() {
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
let max_tokens = 8;
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
let (out, tokens) = truncate_with_token_budget(s, max_tokens, source);
assert!(out.contains("tokens truncated"));
assert!(!out.contains('\u{fffd}'));
assert_eq!(tokens, Some(approx_token_count(s) as u64));
assert!(out.len() < s.len(), "UTF-8 content should be shortened");
let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
assert_eq!(out, "😀😀😀😀…8 tokens truncated…");
assert_eq!(tokens, Some(16));
}
#[test]
fn format_exec_output_truncates_large_error() {
let line = "very long execution error line that should trigger truncation\n";
let large_error = line.repeat(2_500); // way beyond both byte and line limits
let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes());
let total_lines = large_error.lines().count();
let pattern = truncated_message_pattern(line, total_lines);
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
panic!("failed to compile regex {pattern}: {err}");
});
let captures = regex
.captures(&truncated)
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
let body = captures
.name("body")
.expect("missing body capture")
.as_str();
assert!(
body.len() <= model_format_max_bytes(),
"body exceeds byte limit: {} bytes",
body.len()
);
assert_ne!(truncated, large_error);
}
#[test]
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
let max_bytes = model_format_max_bytes();
let long_line = "a".repeat(max_bytes + 50);
let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes);
assert_ne!(truncated, long_line);
let removed_bytes = long_line.len().saturating_sub(max_bytes);
let marker_line =
format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]");
assert!(
truncated.contains(&marker_line),
"missing byte truncation marker: {truncated}"
);
assert!(
!truncated.contains("omitted"),
"line omission marker should not appear when no lines were dropped: {truncated}"
);
}
#[test]
fn format_exec_output_returns_original_when_within_limits() {
let content = "example output\n".repeat(10);
assert_eq!(
truncate_with_line_bytes_budget(&content, model_format_max_bytes()),
content
);
}
#[test]
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}\n"))
.collect();
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
assert!(
truncated.contains(&expected_marker),
"missing omitted marker: {truncated}"
);
assert!(
truncated.contains("line-0\n"),
"expected head line to remain: {truncated}"
);
let last_line = format!("line-{}\n", total_lines - 1);
assert!(
truncated.contains(&last_line),
"expected tail line to remain: {truncated}"
);
}
#[test]
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
let long_line = "x".repeat(256);
let content: String = (0..total_lines)
.map(|idx| format!("line-{idx}-{long_line}\n"))
.collect();
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
assert!(
truncated.contains("[... omitted 42 of 298 lines ...]"),
"expected omitted marker when line count exceeds limit: {truncated}"
);
assert!(
!truncated.contains("byte limit"),
"line omission marker should take precedence over byte marker: {truncated}"
);
fn truncate_middle_bytes_handles_utf8_content() {
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
let out = truncate_text(s, TruncationPolicy::Bytes(20));
assert_eq!(out, "😀😀…31 chars truncated…");
}
#[test]

View file

@ -15,7 +15,7 @@ use crate::exec::SandboxType;
use crate::exec::StreamOutput;
use crate::exec::is_likely_sandbox_denied;
use crate::truncate::TruncationPolicy;
use crate::truncate::truncate_text;
use crate::truncate::formatted_truncate_text;
use codex_utils_pty::ExecCommandSession;
use codex_utils_pty::SpawnedPty;
@ -167,7 +167,7 @@ impl UnifiedExecSession {
};
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
let snippet = truncate_text(
let snippet = formatted_truncate_text(
&aggregated_text,
TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS),
);

View file

@ -25,7 +25,7 @@ use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::truncate_text;
use crate::truncate::formatted_truncate_text;
use super::ExecCommandRequest;
use super::SessionEntry;
@ -72,7 +72,7 @@ impl UnifiedExecSessionManager {
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let chunk_id = generate_chunk_id();
let has_exited = session.has_exited();
let stored_id = self
@ -179,7 +179,7 @@ impl UnifiedExecSessionManager {
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let original_token_count = approx_token_count(&text);
let chunk_id = generate_chunk_id();

View file

@ -3,6 +3,7 @@ use std::time::Duration;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use crate::codex::TurnContext;
use crate::exec::ExecToolCallOutput;
use crate::tools::format_exec_output_str;
@ -20,7 +21,11 @@ fn format_duration_line(duration: Duration) -> String {
format!("Duration: {duration_seconds:.4} seconds")
}
fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutput) -> String {
fn format_user_shell_command_body(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> String {
let mut sections = Vec::new();
sections.push("<command>".to_string());
sections.push(command.to_string());
@ -29,25 +34,33 @@ fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutpu
sections.push(format!("Exit code: {}", exec_output.exit_code));
sections.push(format_duration_line(exec_output.duration));
sections.push("Output:".to_string());
sections.push(format_exec_output_str(exec_output));
sections.push(format_exec_output_str(
exec_output,
turn_context.truncation_policy,
));
sections.push("</result>".to_string());
sections.join("\n")
}
pub fn format_user_shell_command_record(command: &str, exec_output: &ExecToolCallOutput) -> String {
let body = format_user_shell_command_body(command, exec_output);
pub fn format_user_shell_command_record(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> String {
let body = format_user_shell_command_body(command, exec_output, turn_context);
format!("{USER_SHELL_COMMAND_OPEN}\n{body}\n{USER_SHELL_COMMAND_CLOSE}")
}
pub fn user_shell_command_record_item(
command: &str,
exec_output: &ExecToolCallOutput,
turn_context: &TurnContext,
) -> ResponseItem {
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: format_user_shell_command_record(command, exec_output),
text: format_user_shell_command_record(command, exec_output, turn_context),
}],
}
}
@ -55,6 +68,7 @@ pub fn user_shell_command_record_item(
#[cfg(test)]
mod tests {
use super::*;
use crate::codex::make_session_and_context;
use crate::exec::StreamOutput;
use pretty_assertions::assert_eq;
@ -76,7 +90,8 @@ mod tests {
duration: Duration::from_secs(1),
timed_out: false,
};
let item = user_shell_command_record_item("echo hi", &exec_output);
let (_, turn_context) = make_session_and_context();
let item = user_shell_command_record_item("echo hi", &exec_output, &turn_context);
let ResponseItem::Message { content, .. } = item else {
panic!("expected message");
};
@ -99,7 +114,8 @@ mod tests {
duration: Duration::from_millis(120),
timed_out: false,
};
let record = format_user_shell_command_record("false", &exec_output);
let (_, turn_context) = make_session_and_context();
let record = format_user_shell_command_record("false", &exec_output, &turn_context);
assert_eq!(
record,
"<user_shell_command>\n<command>\nfalse\n</command>\n<result>\nExit code: 42\nDuration: 0.1200 seconds\nOutput:\ncombined output wins\n</result>\n</user_shell_command>"

View file

@ -407,7 +407,6 @@ $"#;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[test_case(ShellModelOutput::Shell)]
#[test_case(ShellModelOutput::ShellCommand)]
#[test_case(ShellModelOutput::LocalShell)]
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
skip_if_no_network!(Ok(()));
@ -417,6 +416,7 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
config.tool_output_token_limit = Some(200);
if matches!(output_type, ShellModelOutput::ShellCommand) {
config.features.enable(Feature::ShellCommandTool);
}
@ -457,10 +457,7 @@ Output:
4
5
6
.*
\[\.{3} omitted \d+ of 400 lines \.{3}\]
.*
.*45 tokens truncated.*
396
397
398
@ -858,7 +855,7 @@ async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> {
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Output:
1{5000}$"; // TODO: this is very wrong!!!
1{10000}$";
assert_regex_match(expected_pattern, output);
Ok(())
@ -911,12 +908,8 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
let expected_pattern = r"(?s)^Exit code: 0
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Total output lines: 1
Output:
1*
\[... removed 1 bytes to fit 10000 byte limit ...\]
$";
1*1 chars truncated1*$";
assert_regex_match(expected_pattern, output);
Ok(())

View file

@ -101,17 +101,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
config.tool_output_token_limit = Some(100_000);
});
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
@ -120,13 +119,13 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 400; $i++) { Write-Output $i }"
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 400"],
"command": ["/bin/sh", "-c", "seq 1 100000"],
"timeout_ms": 5_000,
})
};
@ -162,14 +161,169 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) with truncation markers and line elision.
// Expect plain text (not JSON) containing the entire shell output.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
assert_eq!(output.len(), 400094, "we should be almost 100k tokens");
assert!(
!output.contains("tokens truncated"),
"shell output should not contain tokens truncated marker: {output}"
);
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_model("gpt-5.1");
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
let args = if cfg!(windows) {
serde_json::json!({
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 100000"],
"timeout_ms": 5_000,
})
};
// First response: model tells us to run the tool; second: complete the turn.
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
.await?;
// Inspect what we sent back to the model; it should contain a truncated
// function_call_output for the shell call.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) containing the entire shell output.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
assert_eq!(output.len(), 9976); // ~10k characters
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#;
assert_regex_match(truncated_pattern, &output);
Ok(())
}
// Verifies that a standard tool call (shell) exceeding the model formatting
// limits is truncated before being sent back to the model.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
// Use a model that exposes the generic shell tool.
let mut builder = test_codex().with_config(|config| {
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
});
let fixture = builder.build(&server).await?;
let call_id = "shell-too-large";
let args = if cfg!(windows) {
serde_json::json!({
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 100000"],
"timeout_ms": 5_000,
})
};
// First response: model tells us to run the tool; second: complete the turn.
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "done"),
responses::ev_completed("resp-2"),
]),
)
.await;
fixture
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
.await?;
// Inspect what we sent back to the model; it should contain a truncated
// function_call_output for the shell call.
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let output = output.replace("\r\n", "\n");
// Expect plain text (not JSON) containing the entire shell output.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated shell output to be plain text"
);
let truncated_pattern = r#"(?s)^Exit code: 0
Wall time: .* seconds
Total output lines: 400
Wall time: [0-9]+(?:\.[0-9]+)? seconds
Total output lines: 100000
Output:
1
2
@ -177,15 +331,9 @@ Output:
4
5
6
.*
\[\.{3} omitted 144 of 400 lines \.{3}\]
.*
396
397
398
399
400
.*137224 tokens truncated.*
99999
100000
$"#;
assert_regex_match(truncated_pattern, &output);
@ -211,13 +359,13 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
"command": [
"powershell",
"-Command",
"for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
],
"timeout_ms": 5_000,
})
} else {
serde_json::json!({
"command": ["/bin/sh", "-c", "seq 1 2000"],
"command": ["/bin/sh", "-c", "seq 1 10000"],
"timeout_ms": 5_000,
})
};
@ -249,11 +397,11 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
.function_call_output_text(call_id)
.context("function_call_output present for shell call")?;
let total_line_headers = output.matches("Total output lines:").count();
let truncation_markers = output.matches("tokens truncated").count();
assert_eq!(
total_line_headers, 1,
"shell output should carry only one truncation header: {output}"
truncation_markers, 1,
"shell output should carry only one truncation marker: {output}"
);
Ok(())
@ -321,6 +469,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
disabled_tools: None,
},
);
config.tool_output_token_limit = Some(500);
});
let fixture = builder.build(&server).await?;
@ -337,12 +486,6 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
// Expect plain text with token-based truncation marker; the original JSON body
// is truncated in the middle of the echo string.
assert!(
serde_json::from_str::<Value>(&output).is_err(),
"expected truncated MCP output to be plain text"
);
assert!(
!output.contains("Total output lines:"),
"MCP output should not include line-based truncation header: {output}"
@ -350,6 +493,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
assert_regex_match(truncated_pattern, &output);
assert!(output.len() < 2500, "{}", output.len());
Ok(())
}
@ -501,7 +645,9 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
assert_regex_match(pattern, &output);
Ok(())
}
@ -552,14 +698,16 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
assert_regex_match(pattern, &output);
Ok(())
}
// Overriding config with a large token budget should avoid truncation.
// Shell tool output should remain intact when the config opts into a large token budget.
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn large_budget_avoids_truncation() -> Result<()> {
async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
@ -576,6 +724,7 @@ async fn large_budget_avoids_truncation() -> Result<()> {
"command": ["/bin/sh", "-c", "seq 1 1000"],
"timeout_ms": 5_000,
});
let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
mount_sse_once(
&server,
@ -607,6 +756,10 @@ async fn large_budget_avoids_truncation() -> Result<()> {
.function_call_output_text(call_id)
.context("shell output present")?;
assert!(
output.ends_with(&expected_body),
"expected entire shell output when budget increased: {output}"
);
assert!(
!output.contains("truncated"),
"output should remain untruncated with ample budget"
@ -614,3 +767,101 @@ async fn large_budget_avoids_truncation() -> Result<()> {
Ok(())
}
// MCP server output should also remain intact when the config increases the token limit.
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = start_mock_server().await;
let call_id = "rmcp-untruncated";
let server_name = "rmcp";
let tool_name = format!("mcp__{server_name}__echo");
let large_msg = "a".repeat(80_000);
let args_json = serde_json::json!({ "message": large_msg });
mount_sse_once(
&server,
sse(vec![
responses::ev_response_created("resp-1"),
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
responses::ev_completed("resp-1"),
]),
)
.await;
let mock2 = mount_sse_once(
&server,
sse(vec![
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
responses::ev_completed("resp-2"),
]),
)
.await;
let rmcp_test_server_bin = CargoBuild::new()
.package("codex-rmcp-client")
.bin("test_stdio_server")
.run()?
.path()
.to_string_lossy()
.into_owned();
let mut builder = test_codex().with_config(move |config| {
config.features.enable(Feature::RmcpClient);
config.tool_output_token_limit = Some(50_000);
config.mcp_servers.insert(
server_name.to_string(),
codex_core::config::types::McpServerConfig {
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: None,
env_vars: Vec::new(),
cwd: None,
},
enabled: true,
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
},
);
});
let fixture = builder.build(&server).await?;
fixture
.submit_turn_with_policy(
"call the rmcp echo tool with a very large message",
SandboxPolicy::ReadOnly,
)
.await?;
let output = mock2
.single_request()
.function_call_output_text(call_id)
.context("function_call_output present for rmcp call")?;
let parsed: Value = serde_json::from_str(&output)?;
assert_eq!(
output.len(),
80031,
"parsed MCP output should retain its serialized length"
);
let expected_echo = format!("ECHOING: {large_msg}");
let echo_str = parsed["echo"]
.as_str()
.context("echo field should be a string in rmcp echo output")?;
assert_eq!(
echo_str.len(),
expected_echo.len(),
"echo length should match"
);
assert_eq!(echo_str, expected_echo);
assert!(
!output.contains("truncated"),
"output should not include truncation markers when limit is raised: {output}"
);
Ok(())
}

View file

@ -1586,8 +1586,7 @@ PY
let large_output = outputs.get(call_id).expect("missing large output summary");
let output_text = large_output.output.replace("\r\n", "\n");
let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}"#;
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$";
assert_regex_match(truncated_pattern, &output_text);
let original_tokens = large_output

View file

@ -207,10 +207,16 @@ async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyh
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
#[cfg(not(target_os = "windows"))] // TODO: unignore on windows
async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<()> {
let server = responses::start_mock_server().await;
let mut builder = core_test_support::test_codex::test_codex();
let test = builder.build(&server).await?;
let builder = core_test_support::test_codex::test_codex();
let test = builder
.with_config(|config| {
config.tool_output_token_limit = Some(100);
})
.build(&server)
.await?;
#[cfg(windows)]
let command = r#"for ($i=1; $i -le 400; $i++) { Write-Output $i }"#.to_string();
@ -249,10 +255,9 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
.expect("command message recorded in request");
let command_message = command_message.replace("\r\n", "\n");
let head = (1..=128).map(|i| format!("{i}\n")).collect::<String>();
let tail = (273..=400).map(|i| format!("{i}\n")).collect::<String>();
let truncated_body =
format!("Total output lines: 400\n\n{head}\n[... omitted 144 of 400 lines ...]\n\n{tail}");
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}");
let escaped_command = escape(&command);
let escaped_truncated_body = escape(&truncated_body);
let expected_pattern = format!(
@ -270,6 +275,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
let server = start_mock_server().await;
let mut builder = test_codex().with_config(|config| {
config.tool_output_token_limit = Some(100);
config.model = "gpt-5.1-codex".to_string();
config.model_family =
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");