Move shell to use truncate_text (#6842)
Move shell to use the configurable `truncate_text` --------- Co-authored-by: pakrym-oai <pakrym@openai.com>
This commit is contained in:
parent
75f38f16dd
commit
efebc62fb7
16 changed files with 583 additions and 498 deletions
|
|
@ -165,11 +165,9 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
|
|||
));
|
||||
|
||||
let mut output = parsed.output.clone();
|
||||
if let Some(total_lines) = extract_total_output_lines(&parsed.output) {
|
||||
if let Some((stripped, total_lines)) = strip_total_output_header(&parsed.output) {
|
||||
sections.push(format!("Total output lines: {total_lines}"));
|
||||
if let Some(stripped) = strip_total_output_header(&output) {
|
||||
output = stripped.to_string();
|
||||
}
|
||||
output = stripped.to_string();
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
|
|
@ -178,19 +176,12 @@ fn build_structured_output(parsed: &ExecOutputJson) -> String {
|
|||
sections.join("\n")
|
||||
}
|
||||
|
||||
fn extract_total_output_lines(output: &str) -> Option<u32> {
|
||||
let marker_start = output.find("[... omitted ")?;
|
||||
let marker = &output[marker_start..];
|
||||
let (_, after_of) = marker.split_once(" of ")?;
|
||||
let (total_segment, _) = after_of.split_once(' ')?;
|
||||
total_segment.parse::<u32>().ok()
|
||||
}
|
||||
|
||||
fn strip_total_output_header(output: &str) -> Option<&str> {
|
||||
fn strip_total_output_header(output: &str) -> Option<(&str, u32)> {
|
||||
let after_prefix = output.strip_prefix("Total output lines: ")?;
|
||||
let (_, remainder) = after_prefix.split_once('\n')?;
|
||||
let (total_segment, remainder) = after_prefix.split_once('\n')?;
|
||||
let total_lines = total_segment.parse::<u32>().ok()?;
|
||||
let remainder = remainder.strip_prefix('\n').unwrap_or(remainder);
|
||||
Some(remainder)
|
||||
Some((remainder, total_lines))
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
|
|||
|
|
@ -2479,8 +2479,9 @@ mod tests {
|
|||
duration: StdDuration::from_secs(1),
|
||||
timed_out: true,
|
||||
};
|
||||
let (_, turn_context) = make_session_and_context();
|
||||
|
||||
let out = format_exec_output_str(&exec);
|
||||
let out = format_exec_output_str(&exec, turn_context.truncation_policy);
|
||||
|
||||
assert_eq!(
|
||||
out,
|
||||
|
|
|
|||
|
|
@ -145,13 +145,17 @@ impl ContextManager {
|
|||
}
|
||||
|
||||
fn process_item(&self, item: &ResponseItem, policy: TruncationPolicy) -> ResponseItem {
|
||||
let policy_with_serialization_budget = policy.mul(1.2);
|
||||
match item {
|
||||
ResponseItem::FunctionCallOutput { call_id, output } => {
|
||||
let truncated = truncate_text(output.content.as_str(), policy);
|
||||
let truncated_items = output
|
||||
.content_items
|
||||
.as_ref()
|
||||
.map(|items| truncate_function_output_items_with_policy(items, policy));
|
||||
let truncated =
|
||||
truncate_text(output.content.as_str(), policy_with_serialization_budget);
|
||||
let truncated_items = output.content_items.as_ref().map(|items| {
|
||||
truncate_function_output_items_with_policy(
|
||||
items,
|
||||
policy_with_serialization_budget,
|
||||
)
|
||||
});
|
||||
ResponseItem::FunctionCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: FunctionCallOutputPayload {
|
||||
|
|
@ -162,7 +166,7 @@ impl ContextManager {
|
|||
}
|
||||
}
|
||||
ResponseItem::CustomToolCallOutput { call_id, output } => {
|
||||
let truncated = truncate_text(output, policy);
|
||||
let truncated = truncate_text(output, policy_with_serialization_budget);
|
||||
ResponseItem::CustomToolCallOutput {
|
||||
call_id: call_id.clone(),
|
||||
output: truncated,
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ use codex_protocol::models::ReasoningItemReasoningSummary;
|
|||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
const EXEC_FORMAT_MAX_LINES: usize = 256;
|
||||
const EXEC_FORMAT_MAX_BYTES: usize = 10_000;
|
||||
const EXEC_FORMAT_MAX_TOKENS: usize = 2_500;
|
||||
|
||||
fn assistant_msg(text: &str) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
|
|
@ -56,6 +56,10 @@ fn reasoning_msg(text: &str) -> ResponseItem {
|
|||
}
|
||||
}
|
||||
|
||||
fn truncate_exec_output(content: &str) -> String {
|
||||
truncate::truncate_text(content, TruncationPolicy::Tokens(EXEC_FORMAT_MAX_TOKENS))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn filters_non_api_messages() {
|
||||
let mut h = ContextManager::default();
|
||||
|
|
@ -228,7 +232,7 @@ fn normalization_retains_local_shell_outputs() {
|
|||
ResponseItem::FunctionCallOutput {
|
||||
call_id: "shell-1".to_string(),
|
||||
output: FunctionCallOutputPayload {
|
||||
content: "ok".to_string(),
|
||||
content: "Total output lines: 1\n\nok".to_string(),
|
||||
..Default::default()
|
||||
},
|
||||
},
|
||||
|
|
@ -330,8 +334,8 @@ fn record_items_respects_custom_token_limit() {
|
|||
assert!(stored.content.contains("tokens truncated"));
|
||||
}
|
||||
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usize) {
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) {
|
||||
let pattern = truncated_message_pattern(line);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
|
|
@ -347,23 +351,18 @@ fn assert_truncated_message_matches(message: &str, line: &str, total_lines: usiz
|
|||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
let removed: usize = captures
|
||||
.name("removed")
|
||||
.expect("missing removed capture")
|
||||
.as_str()
|
||||
.parse()
|
||||
.unwrap_or_else(|err| panic!("invalid removed tokens: {err}"));
|
||||
assert_eq!(removed, expected_removed, "mismatched removed token count");
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = EXEC_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = EXEC_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
fn truncated_message_pattern(line: &str) -> String {
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit \.{{3}}]\n\n.*)$",
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
format!(r"(?s)^(?P<body>{escaped_line}.*?)(?:\r?)?…(?P<removed>\d+) tokens truncated…(?:.*)?$")
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
@ -371,27 +370,18 @@ fn format_exec_output_truncates_large_error() {
|
|||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&large_error, EXEC_FORMAT_MAX_BYTES);
|
||||
let truncated = truncate_exec_output(&large_error);
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
assert_truncated_message_matches(&truncated, line, total_lines);
|
||||
assert_truncated_message_matches(&truncated, line, 36250);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 50);
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&long_line, EXEC_FORMAT_MAX_BYTES);
|
||||
|
||||
let long_line = "a".repeat(EXEC_FORMAT_MAX_BYTES + 10000);
|
||||
let truncated = truncate_exec_output(&long_line);
|
||||
assert_ne!(truncated, long_line);
|
||||
let removed_bytes = long_line.len().saturating_sub(EXEC_FORMAT_MAX_BYTES);
|
||||
let marker_line = format!(
|
||||
"[... removed {removed_bytes} bytes to fit {EXEC_FORMAT_MAX_BYTES} byte limit ...]"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert_truncated_message_matches(&truncated, "a", 2500);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
|
|
@ -401,34 +391,25 @@ fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
|||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES),
|
||||
content
|
||||
);
|
||||
assert_eq!(truncate_exec_output(&content), content);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = EXEC_FORMAT_MAX_LINES + 100;
|
||||
let total_lines = 2_000;
|
||||
let filler = "x".repeat(64);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.map(|idx| format!("line-{idx}-{filler}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
|
||||
let omitted = total_lines - EXEC_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
let truncated = truncate_exec_output(&content);
|
||||
assert_truncated_message_matches(&truncated, "line-0-", 34_723);
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
truncated.contains("line-0-"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
let last_line = format!("line-{}-", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
|
|
@ -437,22 +418,15 @@ fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
|||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = EXEC_FORMAT_MAX_LINES + 42;
|
||||
let total_lines = 300;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate::truncate_with_line_bytes_budget(&content, EXEC_FORMAT_MAX_BYTES);
|
||||
let truncated = truncate_exec_output(&content);
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("byte limit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
assert_truncated_message_matches(&truncated, "line-0-", 17_423);
|
||||
}
|
||||
|
||||
//TODO(aibrahim): run CI in release mode.
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
mod history;
|
||||
mod normalize;
|
||||
|
||||
pub(crate) use crate::truncate::truncate_with_line_bytes_budget;
|
||||
pub(crate) use history::ContextManager;
|
||||
|
|
|
|||
|
|
@ -122,7 +122,11 @@ impl SessionTask for UserShellCommandTask {
|
|||
duration: Duration::ZERO,
|
||||
timed_out: false,
|
||||
};
|
||||
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
|
||||
let output_items = [user_shell_command_record_item(
|
||||
&raw_command,
|
||||
&exec_output,
|
||||
&turn_context,
|
||||
)];
|
||||
session
|
||||
.record_conversation_items(turn_context.as_ref(), &output_items)
|
||||
.await;
|
||||
|
|
@ -164,12 +168,19 @@ impl SessionTask for UserShellCommandTask {
|
|||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
exit_code: output.exit_code,
|
||||
duration: output.duration,
|
||||
formatted_output: format_exec_output_str(&output),
|
||||
formatted_output: format_exec_output_str(
|
||||
&output,
|
||||
turn_context.truncation_policy,
|
||||
),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
|
||||
let output_items = [user_shell_command_record_item(&raw_command, &output)];
|
||||
let output_items = [user_shell_command_record_item(
|
||||
&raw_command,
|
||||
&output,
|
||||
&turn_context,
|
||||
)];
|
||||
session
|
||||
.record_conversation_items(turn_context.as_ref(), &output_items)
|
||||
.await;
|
||||
|
|
@ -201,11 +212,18 @@ impl SessionTask for UserShellCommandTask {
|
|||
aggregated_output: exec_output.aggregated_output.text.clone(),
|
||||
exit_code: exec_output.exit_code,
|
||||
duration: exec_output.duration,
|
||||
formatted_output: format_exec_output_str(&exec_output),
|
||||
formatted_output: format_exec_output_str(
|
||||
&exec_output,
|
||||
turn_context.truncation_policy,
|
||||
),
|
||||
}),
|
||||
)
|
||||
.await;
|
||||
let output_items = [user_shell_command_record_item(&raw_command, &exec_output)];
|
||||
let output_items = [user_shell_command_record_item(
|
||||
&raw_command,
|
||||
&exec_output,
|
||||
&turn_context,
|
||||
)];
|
||||
session
|
||||
.record_conversation_items(turn_context.as_ref(), &output_items)
|
||||
.await;
|
||||
|
|
|
|||
|
|
@ -242,12 +242,16 @@ impl ToolEmitter {
|
|||
self.emit(ctx, ToolEventStage::Begin).await;
|
||||
}
|
||||
|
||||
fn format_exec_output_for_model(&self, output: &ExecToolCallOutput) -> String {
|
||||
fn format_exec_output_for_model(
|
||||
&self,
|
||||
output: &ExecToolCallOutput,
|
||||
ctx: ToolEventCtx<'_>,
|
||||
) -> String {
|
||||
match self {
|
||||
Self::Shell { freeform: true, .. } => {
|
||||
super::format_exec_output_for_model_freeform(output)
|
||||
super::format_exec_output_for_model_freeform(output, ctx.turn.truncation_policy)
|
||||
}
|
||||
_ => super::format_exec_output_for_model_structured(output),
|
||||
_ => super::format_exec_output_for_model_structured(output, ctx.turn.truncation_policy),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -258,7 +262,7 @@ impl ToolEmitter {
|
|||
) -> Result<String, FunctionCallError> {
|
||||
let (event, result) = match out {
|
||||
Ok(output) => {
|
||||
let content = self.format_exec_output_for_model(&output);
|
||||
let content = self.format_exec_output_for_model(&output, ctx);
|
||||
let exit_code = output.exit_code;
|
||||
let event = ToolEventStage::Success(output);
|
||||
let result = if exit_code == 0 {
|
||||
|
|
@ -270,7 +274,7 @@ impl ToolEmitter {
|
|||
}
|
||||
Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Timeout { output })))
|
||||
| Err(ToolError::Codex(CodexErr::Sandbox(SandboxErr::Denied { output }))) => {
|
||||
let response = self.format_exec_output_for_model(&output);
|
||||
let response = self.format_exec_output_for_model(&output, ctx);
|
||||
let event = ToolEventStage::Failure(ToolEventFailure::Output(*output));
|
||||
let result = Err(FunctionCallError::RespondToModel(response));
|
||||
(event, result)
|
||||
|
|
@ -359,7 +363,7 @@ async fn emit_exec_stage(
|
|||
aggregated_output: output.aggregated_output.text.clone(),
|
||||
exit_code: output.exit_code,
|
||||
duration: output.duration,
|
||||
formatted_output: format_exec_output_str(&output),
|
||||
formatted_output: format_exec_output_str(&output, ctx.turn.truncation_policy),
|
||||
};
|
||||
emit_exec_end(ctx, exec_input, exec_result).await;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,9 +9,10 @@ pub mod runtimes;
|
|||
pub mod sandboxing;
|
||||
pub mod spec;
|
||||
|
||||
use crate::context_manager::truncate_with_line_bytes_budget;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::truncate::truncate_formatted_exec_output;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use crate::truncate::truncate_text;
|
||||
pub use router::ToolRouter;
|
||||
use serde::Serialize;
|
||||
|
||||
|
|
@ -21,12 +22,12 @@ pub(crate) const TELEMETRY_PREVIEW_MAX_LINES: usize = 64; // lines
|
|||
pub(crate) const TELEMETRY_PREVIEW_TRUNCATION_NOTICE: &str =
|
||||
"[... telemetry preview truncated ...]";
|
||||
|
||||
// TODO(aibrahim): migrate shell tool to use truncate text and respect config value
|
||||
const SHELL_OUTPUT_MAX_BYTES: usize = 10_000;
|
||||
|
||||
/// Format the combined exec output for sending back to the model.
|
||||
/// Includes exit code and duration metadata; truncates large bodies safely.
|
||||
pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput) -> String {
|
||||
pub fn format_exec_output_for_model_structured(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> String {
|
||||
let ExecToolCallOutput {
|
||||
exit_code,
|
||||
duration,
|
||||
|
|
@ -48,7 +49,7 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput)
|
|||
// round to 1 decimal place
|
||||
let duration_seconds = ((duration.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
|
||||
let formatted_output = format_exec_output_str(exec_output);
|
||||
let formatted_output = format_exec_output_str(exec_output, truncation_policy);
|
||||
|
||||
let payload = ExecOutput {
|
||||
output: &formatted_output,
|
||||
|
|
@ -62,18 +63,16 @@ pub fn format_exec_output_for_model_structured(exec_output: &ExecToolCallOutput)
|
|||
serde_json::to_string(&payload).expect("serialize ExecOutput")
|
||||
}
|
||||
|
||||
pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -> String {
|
||||
pub fn format_exec_output_for_model_freeform(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> String {
|
||||
// round to 1 decimal place
|
||||
let duration_seconds = ((exec_output.duration.as_secs_f32()) * 10.0).round() / 10.0;
|
||||
|
||||
let total_lines = exec_output.aggregated_output.text.lines().count();
|
||||
|
||||
let formatted_output = truncate_formatted_exec_output(
|
||||
&exec_output.aggregated_output.text,
|
||||
total_lines,
|
||||
SHELL_OUTPUT_MAX_BYTES,
|
||||
256, // TODO: to be removed
|
||||
);
|
||||
let formatted_output = truncate_text(&exec_output.aggregated_output.text, truncation_policy);
|
||||
|
||||
let mut sections = Vec::new();
|
||||
|
||||
|
|
@ -89,7 +88,10 @@ pub fn format_exec_output_for_model_freeform(exec_output: &ExecToolCallOutput) -
|
|||
sections.join("\n")
|
||||
}
|
||||
|
||||
pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
||||
pub fn format_exec_output_str(
|
||||
exec_output: &ExecToolCallOutput,
|
||||
truncation_policy: TruncationPolicy,
|
||||
) -> String {
|
||||
let ExecToolCallOutput {
|
||||
aggregated_output, ..
|
||||
} = exec_output;
|
||||
|
|
@ -106,5 +108,5 @@ pub fn format_exec_output_str(exec_output: &ExecToolCallOutput) -> String {
|
|||
};
|
||||
|
||||
// Truncate for model consumption before serialization.
|
||||
truncate_with_line_bytes_budget(&body, SHELL_OUTPUT_MAX_BYTES)
|
||||
formatted_truncate_text(&body, truncation_policy)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,11 +2,8 @@
|
|||
//! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation
|
||||
//! used across the core crate.
|
||||
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use codex_utils_string::take_last_bytes_at_char_boundary;
|
||||
|
||||
use crate::config::Config;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
|
||||
const APPROX_BYTES_PER_TOKEN: usize = 4;
|
||||
|
||||
|
|
@ -17,6 +14,18 @@ pub enum TruncationPolicy {
|
|||
}
|
||||
|
||||
impl TruncationPolicy {
|
||||
/// Scale the underlying budget by `multiplier`, rounding up to avoid under-budgeting.
|
||||
pub fn mul(self, multiplier: f64) -> Self {
|
||||
match self {
|
||||
TruncationPolicy::Bytes(bytes) => {
|
||||
TruncationPolicy::Bytes((bytes as f64 * multiplier).ceil() as usize)
|
||||
}
|
||||
TruncationPolicy::Tokens(tokens) => {
|
||||
TruncationPolicy::Tokens((tokens as f64 * multiplier).ceil() as usize)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(config: &Config) -> Self {
|
||||
let config_token_limit = config.tool_output_token_limit;
|
||||
|
||||
|
|
@ -65,34 +74,20 @@ impl TruncationPolicy {
|
|||
}
|
||||
}
|
||||
|
||||
/// Format a block of exec/tool output for model consumption, truncating by
|
||||
/// lines and bytes while preserving head and tail segments.
|
||||
pub(crate) fn truncate_with_line_bytes_budget(content: &str, bytes_budget: usize) -> String {
|
||||
// TODO(aibrahim): to be removed
|
||||
let lines_budget = 256;
|
||||
// Head+tail truncation for the model: show the beginning and end with an elision.
|
||||
// Clients still receive full streams; only this formatted summary is capped.
|
||||
let total_lines = content.lines().count();
|
||||
if content.len() <= bytes_budget && total_lines <= lines_budget {
|
||||
pub(crate) fn formatted_truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
||||
if content.len() <= policy.byte_budget() {
|
||||
return content.to_string();
|
||||
}
|
||||
let output = truncate_formatted_exec_output(content, total_lines, bytes_budget, lines_budget);
|
||||
format!("Total output lines: {total_lines}\n\n{output}")
|
||||
let total_lines = content.lines().count();
|
||||
let result = truncate_text(content, policy);
|
||||
format!("Total output lines: {total_lines}\n\n{result}")
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
|
||||
match policy {
|
||||
TruncationPolicy::Bytes(bytes) => truncate_with_byte_estimate(
|
||||
content,
|
||||
bytes,
|
||||
TruncationSource::Policy(TruncationPolicy::Bytes(bytes)),
|
||||
),
|
||||
TruncationPolicy::Tokens(tokens) => {
|
||||
let (truncated, _) = truncate_with_token_budget(
|
||||
content,
|
||||
tokens,
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(tokens)),
|
||||
);
|
||||
TruncationPolicy::Bytes(_) => truncate_with_byte_estimate(content, policy),
|
||||
TruncationPolicy::Tokens(_) => {
|
||||
let (truncated, _) = truncate_with_token_budget(content, policy);
|
||||
truncated
|
||||
}
|
||||
}
|
||||
|
|
@ -162,24 +157,18 @@ pub(crate) fn truncate_function_output_items_with_policy(
|
|||
/// preserving the beginning and the end. Returns the possibly truncated string
|
||||
/// and `Some(original_token_count)` if truncation occurred; otherwise returns
|
||||
/// the original string and `None`.
|
||||
fn truncate_with_token_budget(
|
||||
s: &str,
|
||||
max_tokens: usize,
|
||||
source: TruncationSource,
|
||||
) -> (String, Option<u64>) {
|
||||
fn truncate_with_token_budget(s: &str, policy: TruncationPolicy) -> (String, Option<u64>) {
|
||||
if s.is_empty() {
|
||||
return (String::new(), None);
|
||||
}
|
||||
let max_tokens = policy.token_budget();
|
||||
|
||||
let byte_len = s.len();
|
||||
if max_tokens > 0 {
|
||||
let small_threshold = approx_bytes_for_tokens(max_tokens / 4);
|
||||
if small_threshold > 0 && byte_len <= small_threshold {
|
||||
return (s.to_string(), None);
|
||||
}
|
||||
if max_tokens > 0 && byte_len <= approx_bytes_for_tokens(max_tokens) {
|
||||
return (s.to_string(), None);
|
||||
}
|
||||
|
||||
let truncated = truncate_with_byte_estimate(s, approx_bytes_for_tokens(max_tokens), source);
|
||||
let truncated = truncate_with_byte_estimate(s, policy);
|
||||
let approx_total_usize = approx_token_count(s);
|
||||
let approx_total = u64::try_from(approx_total_usize).unwrap_or(u64::MAX);
|
||||
if truncated == s {
|
||||
|
|
@ -192,14 +181,19 @@ fn truncate_with_token_budget(
|
|||
/// Truncate a string using a byte budget derived from the token budget, without
|
||||
/// performing any real tokenization. This keeps the logic purely byte-based and
|
||||
/// uses a bytes placeholder in the truncated output.
|
||||
fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSource) -> String {
|
||||
fn truncate_with_byte_estimate(s: &str, policy: TruncationPolicy) -> String {
|
||||
if s.is_empty() {
|
||||
return String::new();
|
||||
}
|
||||
let total_chars = s.chars().count();
|
||||
let max_bytes = policy.byte_budget();
|
||||
|
||||
if max_bytes == 0 {
|
||||
// No budget to show content; just report that everything was truncated.
|
||||
let marker = format_truncation_marker(source, removed_units_for_source(source, s.len()));
|
||||
let marker = format_truncation_marker(
|
||||
policy,
|
||||
removed_units_for_source(policy, s.len(), total_chars),
|
||||
);
|
||||
return marker;
|
||||
}
|
||||
|
||||
|
|
@ -208,127 +202,32 @@ fn truncate_with_byte_estimate(s: &str, max_bytes: usize, source: TruncationSour
|
|||
}
|
||||
|
||||
let total_bytes = s.len();
|
||||
let removed_bytes = total_bytes.saturating_sub(max_bytes);
|
||||
let marker = format_truncation_marker(source, removed_units_for_source(source, removed_bytes));
|
||||
let marker_len = marker.len();
|
||||
|
||||
if marker_len >= max_bytes {
|
||||
let truncated_marker = truncate_on_boundary(&marker, max_bytes);
|
||||
return truncated_marker.to_string();
|
||||
}
|
||||
|
||||
let keep_budget = max_bytes - marker_len;
|
||||
let (left_budget, right_budget) = split_budget(keep_budget);
|
||||
let (left_budget, right_budget) = split_budget(max_bytes);
|
||||
let prefix_end = pick_prefix_end(s, left_budget);
|
||||
let mut suffix_start = pick_suffix_start(s, right_budget);
|
||||
if suffix_start < prefix_end {
|
||||
suffix_start = prefix_end;
|
||||
}
|
||||
|
||||
let mut out = assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker);
|
||||
let left_chars = s[..prefix_end].chars().count();
|
||||
let right_chars = s[suffix_start..].chars().count();
|
||||
let removed_chars = total_chars
|
||||
.saturating_sub(left_chars)
|
||||
.saturating_sub(right_chars);
|
||||
|
||||
if out.len() > max_bytes {
|
||||
let boundary = truncate_on_boundary(&out, max_bytes);
|
||||
out.truncate(boundary.len());
|
||||
}
|
||||
let marker = format_truncation_marker(
|
||||
policy,
|
||||
removed_units_for_source(policy, total_bytes.saturating_sub(max_bytes), removed_chars),
|
||||
);
|
||||
|
||||
out
|
||||
assemble_truncated_output(&s[..prefix_end], &s[suffix_start..], &marker)
|
||||
}
|
||||
|
||||
pub(crate) fn truncate_formatted_exec_output(
|
||||
content: &str,
|
||||
total_lines: usize,
|
||||
limit_bytes: usize,
|
||||
limit_lines: usize,
|
||||
) -> String {
|
||||
error_on_double_truncation(content);
|
||||
let head_lines: usize = limit_lines / 2;
|
||||
let tail_lines: usize = limit_lines - head_lines; // 128
|
||||
let head_bytes: usize = limit_bytes / 2;
|
||||
let segments: Vec<&str> = content.split_inclusive('\n').collect();
|
||||
let head_take = head_lines.min(segments.len());
|
||||
let tail_take = tail_lines.min(segments.len().saturating_sub(head_take));
|
||||
let omitted = segments.len().saturating_sub(head_take + tail_take);
|
||||
|
||||
let head_slice_end: usize = segments
|
||||
.iter()
|
||||
.take(head_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum();
|
||||
let tail_slice_start: usize = if tail_take == 0 {
|
||||
content.len()
|
||||
} else {
|
||||
content.len()
|
||||
- segments
|
||||
.iter()
|
||||
.rev()
|
||||
.take(tail_take)
|
||||
.map(|segment| segment.len())
|
||||
.sum::<usize>()
|
||||
};
|
||||
let head_slice = &content[..head_slice_end];
|
||||
let tail_slice = &content[tail_slice_start..];
|
||||
let truncated_by_bytes = content.len() > limit_bytes;
|
||||
// this is a bit wrong. We are counting metadata lines and not just shell output lines.
|
||||
let marker = if omitted > 0 {
|
||||
let marker_text = format_truncation_marker(
|
||||
TruncationSource::LineOmission { total_lines },
|
||||
u64::try_from(omitted).unwrap_or(u64::MAX),
|
||||
);
|
||||
Some(format!("\n{marker_text}\n\n"))
|
||||
} else if truncated_by_bytes {
|
||||
let removed_bytes =
|
||||
u64::try_from(content.len().saturating_sub(limit_bytes)).unwrap_or(u64::MAX);
|
||||
let marker_text =
|
||||
format_truncation_marker(TruncationSource::ByteLimit { limit_bytes }, removed_bytes);
|
||||
Some(format!("\n{marker_text}\n\n"))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let marker_len = marker.as_ref().map_or(0, String::len);
|
||||
let base_head_budget = head_bytes.min(limit_bytes);
|
||||
let head_budget = base_head_budget.min(limit_bytes.saturating_sub(marker_len));
|
||||
let head_part = take_bytes_at_char_boundary(head_slice, head_budget);
|
||||
let mut result = String::with_capacity(limit_bytes.min(content.len()));
|
||||
|
||||
result.push_str(head_part);
|
||||
if let Some(marker_text) = marker.as_ref() {
|
||||
result.push_str(marker_text);
|
||||
}
|
||||
|
||||
let remaining = limit_bytes.saturating_sub(result.len());
|
||||
if remaining == 0 {
|
||||
return result;
|
||||
}
|
||||
|
||||
let tail_part = take_last_bytes_at_char_boundary(tail_slice, remaining);
|
||||
result.push_str(tail_part);
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub enum TruncationSource {
|
||||
Policy(TruncationPolicy),
|
||||
LineOmission { total_lines: usize },
|
||||
ByteLimit { limit_bytes: usize },
|
||||
}
|
||||
|
||||
fn format_truncation_marker(source: TruncationSource, removed_count: u64) -> String {
|
||||
match source {
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
|
||||
format!("[…{removed_count} tokens truncated…]")
|
||||
}
|
||||
TruncationSource::Policy(TruncationPolicy::Bytes(_)) => {
|
||||
format!("[…{removed_count} bytes truncated…]")
|
||||
}
|
||||
TruncationSource::LineOmission { total_lines } => {
|
||||
format!("[... omitted {removed_count} of {total_lines} lines ...]")
|
||||
}
|
||||
TruncationSource::ByteLimit { limit_bytes } => {
|
||||
format!("[... removed {removed_count} bytes to fit {limit_bytes} byte limit ...]")
|
||||
}
|
||||
fn format_truncation_marker(policy: TruncationPolicy, removed_count: u64) -> String {
|
||||
match policy {
|
||||
TruncationPolicy::Tokens(_) => format!("…{removed_count} tokens truncated…"),
|
||||
TruncationPolicy::Bytes(_) => format!("…{removed_count} chars truncated…"),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -337,12 +236,14 @@ fn split_budget(budget: usize) -> (usize, usize) {
|
|||
(left, budget - left)
|
||||
}
|
||||
|
||||
fn removed_units_for_source(source: TruncationSource, removed_bytes: usize) -> u64 {
|
||||
match source {
|
||||
TruncationSource::Policy(TruncationPolicy::Tokens(_)) => {
|
||||
approx_tokens_from_byte_count(removed_bytes)
|
||||
}
|
||||
_ => u64::try_from(removed_bytes).unwrap_or(u64::MAX),
|
||||
fn removed_units_for_source(
|
||||
policy: TruncationPolicy,
|
||||
removed_bytes: usize,
|
||||
removed_chars: usize,
|
||||
) -> u64 {
|
||||
match policy {
|
||||
TruncationPolicy::Tokens(_) => approx_tokens_from_byte_count(removed_bytes),
|
||||
TruncationPolicy::Bytes(_) => u64::try_from(removed_chars).unwrap_or(u64::MAX),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -350,7 +251,6 @@ fn assemble_truncated_output(prefix: &str, suffix: &str, marker: &str) -> String
|
|||
let mut out = String::with_capacity(prefix.len() + marker.len() + suffix.len() + 1);
|
||||
out.push_str(prefix);
|
||||
out.push_str(marker);
|
||||
out.push('\n');
|
||||
out.push_str(suffix);
|
||||
out
|
||||
}
|
||||
|
|
@ -405,203 +305,130 @@ fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
|
|||
idx
|
||||
}
|
||||
|
||||
fn error_on_double_truncation(content: &str) {
|
||||
if content.contains("Total output lines:") && content.contains("omitted") {
|
||||
tracing::error!(
|
||||
"FunctionCallOutput content was already truncated before ContextManager::record_items; this would cause double truncation {content}"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::config::OPENAI_DEFAULT_MODEL;
|
||||
use crate::model_family::derive_default_model_family;
|
||||
use crate::model_family::find_family_for_model;
|
||||
|
||||
use super::TruncationPolicy;
|
||||
use super::TruncationSource;
|
||||
use super::approx_token_count;
|
||||
use super::formatted_truncate_text;
|
||||
use super::truncate_function_output_items_with_policy;
|
||||
use super::truncate_with_line_bytes_budget;
|
||||
use super::truncate_text;
|
||||
use super::truncate_with_token_budget;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use pretty_assertions::assert_eq;
|
||||
use regex_lite::Regex;
|
||||
|
||||
const MODEL_FORMAT_MAX_LINES: usize = 256;
|
||||
#[test]
|
||||
fn truncate_bytes_less_than_placeholder_returns_placeholder() {
|
||||
let content = "example output";
|
||||
|
||||
fn model_format_max_bytes() -> usize {
|
||||
find_family_for_model(OPENAI_DEFAULT_MODEL)
|
||||
.unwrap_or_else(|| derive_default_model_family(OPENAI_DEFAULT_MODEL))
|
||||
.truncation_policy
|
||||
.byte_budget()
|
||||
}
|
||||
|
||||
fn truncated_message_pattern(line: &str, total_lines: usize) -> String {
|
||||
let head_lines = MODEL_FORMAT_MAX_LINES / 2;
|
||||
let tail_lines = MODEL_FORMAT_MAX_LINES - head_lines;
|
||||
let head_take = head_lines.min(total_lines);
|
||||
let tail_take = tail_lines.min(total_lines.saturating_sub(head_take));
|
||||
let omitted = total_lines.saturating_sub(head_take + tail_take);
|
||||
let escaped_line = regex_lite::escape(line);
|
||||
if omitted == 0 {
|
||||
return format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} removed \d+ bytes to fit {max_bytes} byte limit \.{{3}}]\n\n.*)$",
|
||||
max_bytes = model_format_max_bytes(),
|
||||
);
|
||||
}
|
||||
format!(
|
||||
r"(?s)^Total output lines: {total_lines}\n\n(?P<body>{escaped_line}.*\n\[\.{{3}} omitted {omitted} of {total_lines} lines \.{{3}}]\n\n.*)$",
|
||||
)
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\n…13 chars truncated…t",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(1)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_returns_original_when_under_limit() {
|
||||
fn truncate_tokens_less_than_placeholder_returns_placeholder() {
|
||||
let content = "example output";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nex…3 tokens truncated…ut",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(1)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_tokens_under_limit_returns_original() {
|
||||
let content = "example output";
|
||||
|
||||
assert_eq!(
|
||||
content,
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_bytes_under_limit_returns_original() {
|
||||
let content = "example output";
|
||||
|
||||
assert_eq!(
|
||||
content,
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(20)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_tokens_over_limit_returns_truncated() {
|
||||
let content = "this is an example of a long output that should be truncated";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nthis is an…10 tokens truncated… truncated",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(5)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_bytes_over_limit_returns_truncated() {
|
||||
let content = "this is an example of a long output that should be truncated";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 1\n\nthis is an exam…30 chars truncated…ld be truncated",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_bytes_reports_original_line_count_when_truncated() {
|
||||
let content =
|
||||
"this is an example of a long output that should be truncated\nalso some other line";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 2\n\nthis is an exam…51 chars truncated…some other line",
|
||||
formatted_truncate_text(content, TruncationPolicy::Bytes(30)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_tokens_reports_original_line_count_when_truncated() {
|
||||
let content =
|
||||
"this is an example of a long output that should be truncated\nalso some other line";
|
||||
|
||||
assert_eq!(
|
||||
"Total output lines: 2\n\nthis is an example o…11 tokens truncated…also some other line",
|
||||
formatted_truncate_text(content, TruncationPolicy::Tokens(10)),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_with_token_budget_returns_original_when_under_limit() {
|
||||
let s = "short output";
|
||||
let limit = 100;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(limit));
|
||||
let (out, original) = truncate_with_token_budget(s, limit, source);
|
||||
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(limit));
|
||||
assert_eq!(out, s);
|
||||
assert_eq!(original, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_reports_truncation_at_zero_limit() {
|
||||
fn truncate_with_token_budget_reports_truncation_at_zero_limit() {
|
||||
let s = "abcdef";
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(0));
|
||||
let (out, original) = truncate_with_token_budget(s, 0, source);
|
||||
assert_eq!(out, "[…2 tokens truncated…]");
|
||||
assert_eq!(original, Some(approx_token_count(s) as u64));
|
||||
let (out, original) = truncate_with_token_budget(s, TruncationPolicy::Tokens(0));
|
||||
assert_eq!(out, "…2 tokens truncated…");
|
||||
assert_eq!(original, Some(2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_enforces_token_budget() {
|
||||
let s = "alpha beta gamma delta epsilon zeta eta theta iota kappa";
|
||||
let max_tokens = 12;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
|
||||
let (out, original) = truncate_with_token_budget(s, max_tokens, source);
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert_eq!(original, Some(approx_token_count(s) as u64));
|
||||
assert!(out.len() < s.len(), "truncated output should be shorter");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn truncate_middle_handles_utf8_content() {
|
||||
fn truncate_middle_tokens_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let max_tokens = 8;
|
||||
let source = TruncationSource::Policy(TruncationPolicy::Tokens(max_tokens));
|
||||
let (out, tokens) = truncate_with_token_budget(s, max_tokens, source);
|
||||
|
||||
assert!(out.contains("tokens truncated"));
|
||||
assert!(!out.contains('\u{fffd}'));
|
||||
assert_eq!(tokens, Some(approx_token_count(s) as u64));
|
||||
assert!(out.len() < s.len(), "UTF-8 content should be shortened");
|
||||
let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
|
||||
assert_eq!(out, "😀😀😀😀…8 tokens truncated…");
|
||||
assert_eq!(tokens, Some(16));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_truncates_large_error() {
|
||||
let line = "very long execution error line that should trigger truncation\n";
|
||||
let large_error = line.repeat(2_500); // way beyond both byte and line limits
|
||||
|
||||
let truncated = truncate_with_line_bytes_budget(&large_error, model_format_max_bytes());
|
||||
|
||||
let total_lines = large_error.lines().count();
|
||||
let pattern = truncated_message_pattern(line, total_lines);
|
||||
let regex = Regex::new(&pattern).unwrap_or_else(|err| {
|
||||
panic!("failed to compile regex {pattern}: {err}");
|
||||
});
|
||||
let captures = regex
|
||||
.captures(&truncated)
|
||||
.unwrap_or_else(|| panic!("message failed to match pattern {pattern}: {truncated}"));
|
||||
let body = captures
|
||||
.name("body")
|
||||
.expect("missing body capture")
|
||||
.as_str();
|
||||
assert!(
|
||||
body.len() <= model_format_max_bytes(),
|
||||
"body exceeds byte limit: {} bytes",
|
||||
body.len()
|
||||
);
|
||||
assert_ne!(truncated, large_error);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_marks_byte_truncation_without_omitted_lines() {
|
||||
let max_bytes = model_format_max_bytes();
|
||||
let long_line = "a".repeat(max_bytes + 50);
|
||||
let truncated = truncate_with_line_bytes_budget(&long_line, max_bytes);
|
||||
|
||||
assert_ne!(truncated, long_line);
|
||||
let removed_bytes = long_line.len().saturating_sub(max_bytes);
|
||||
let marker_line =
|
||||
format!("[... removed {removed_bytes} bytes to fit {max_bytes} byte limit ...]");
|
||||
assert!(
|
||||
truncated.contains(&marker_line),
|
||||
"missing byte truncation marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("omitted"),
|
||||
"line omission marker should not appear when no lines were dropped: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_returns_original_when_within_limits() {
|
||||
let content = "example output\n".repeat(10);
|
||||
|
||||
assert_eq!(
|
||||
truncate_with_line_bytes_budget(&content, model_format_max_bytes()),
|
||||
content
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_reports_omitted_lines_and_keeps_head_and_tail() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 100;
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
|
||||
|
||||
let omitted = total_lines - MODEL_FORMAT_MAX_LINES;
|
||||
let expected_marker = format!("[... omitted {omitted} of {total_lines} lines ...]");
|
||||
|
||||
assert!(
|
||||
truncated.contains(&expected_marker),
|
||||
"missing omitted marker: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
truncated.contains("line-0\n"),
|
||||
"expected head line to remain: {truncated}"
|
||||
);
|
||||
|
||||
let last_line = format!("line-{}\n", total_lines - 1);
|
||||
assert!(
|
||||
truncated.contains(&last_line),
|
||||
"expected tail line to remain: {truncated}"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn format_exec_output_prefers_line_marker_when_both_limits_exceeded() {
|
||||
let total_lines = MODEL_FORMAT_MAX_LINES + 42;
|
||||
let long_line = "x".repeat(256);
|
||||
let content: String = (0..total_lines)
|
||||
.map(|idx| format!("line-{idx}-{long_line}\n"))
|
||||
.collect();
|
||||
|
||||
let truncated = truncate_with_line_bytes_budget(&content, model_format_max_bytes());
|
||||
|
||||
assert!(
|
||||
truncated.contains("[... omitted 42 of 298 lines ...]"),
|
||||
"expected omitted marker when line count exceeds limit: {truncated}"
|
||||
);
|
||||
assert!(
|
||||
!truncated.contains("byte limit"),
|
||||
"line omission marker should take precedence over byte marker: {truncated}"
|
||||
);
|
||||
fn truncate_middle_bytes_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let out = truncate_text(s, TruncationPolicy::Bytes(20));
|
||||
assert_eq!(out, "😀😀…31 chars truncated…");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ use crate::exec::SandboxType;
|
|||
use crate::exec::StreamOutput;
|
||||
use crate::exec::is_likely_sandbox_denied;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use codex_utils_pty::ExecCommandSession;
|
||||
use codex_utils_pty::SpawnedPty;
|
||||
|
||||
|
|
@ -167,7 +167,7 @@ impl UnifiedExecSession {
|
|||
};
|
||||
|
||||
if is_likely_sandbox_denied(self.sandbox_type(), &exec_output) {
|
||||
let snippet = truncate_text(
|
||||
let snippet = formatted_truncate_text(
|
||||
&aggregated_text,
|
||||
TruncationPolicy::Tokens(UNIFIED_EXEC_OUTPUT_MAX_TOKENS),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
|
|||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
|
||||
use super::ExecCommandRequest;
|
||||
use super::SessionEntry;
|
||||
|
|
@ -72,7 +72,7 @@ impl UnifiedExecSessionManager {
|
|||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let chunk_id = generate_chunk_id();
|
||||
let has_exited = session.has_exited();
|
||||
let stored_id = self
|
||||
|
|
@ -179,7 +179,7 @@ impl UnifiedExecSessionManager {
|
|||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let output = truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let original_token_count = approx_token_count(&text);
|
||||
let chunk_id = generate_chunk_id();
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use std::time::Duration;
|
|||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
|
||||
use crate::codex::TurnContext;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::tools::format_exec_output_str;
|
||||
|
||||
|
|
@ -20,7 +21,11 @@ fn format_duration_line(duration: Duration) -> String {
|
|||
format!("Duration: {duration_seconds:.4} seconds")
|
||||
}
|
||||
|
||||
fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutput) -> String {
|
||||
fn format_user_shell_command_body(
|
||||
command: &str,
|
||||
exec_output: &ExecToolCallOutput,
|
||||
turn_context: &TurnContext,
|
||||
) -> String {
|
||||
let mut sections = Vec::new();
|
||||
sections.push("<command>".to_string());
|
||||
sections.push(command.to_string());
|
||||
|
|
@ -29,25 +34,33 @@ fn format_user_shell_command_body(command: &str, exec_output: &ExecToolCallOutpu
|
|||
sections.push(format!("Exit code: {}", exec_output.exit_code));
|
||||
sections.push(format_duration_line(exec_output.duration));
|
||||
sections.push("Output:".to_string());
|
||||
sections.push(format_exec_output_str(exec_output));
|
||||
sections.push(format_exec_output_str(
|
||||
exec_output,
|
||||
turn_context.truncation_policy,
|
||||
));
|
||||
sections.push("</result>".to_string());
|
||||
sections.join("\n")
|
||||
}
|
||||
|
||||
pub fn format_user_shell_command_record(command: &str, exec_output: &ExecToolCallOutput) -> String {
|
||||
let body = format_user_shell_command_body(command, exec_output);
|
||||
pub fn format_user_shell_command_record(
|
||||
command: &str,
|
||||
exec_output: &ExecToolCallOutput,
|
||||
turn_context: &TurnContext,
|
||||
) -> String {
|
||||
let body = format_user_shell_command_body(command, exec_output, turn_context);
|
||||
format!("{USER_SHELL_COMMAND_OPEN}\n{body}\n{USER_SHELL_COMMAND_CLOSE}")
|
||||
}
|
||||
|
||||
pub fn user_shell_command_record_item(
|
||||
command: &str,
|
||||
exec_output: &ExecToolCallOutput,
|
||||
turn_context: &TurnContext,
|
||||
) -> ResponseItem {
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: format_user_shell_command_record(command, exec_output),
|
||||
text: format_user_shell_command_record(command, exec_output, turn_context),
|
||||
}],
|
||||
}
|
||||
}
|
||||
|
|
@ -55,6 +68,7 @@ pub fn user_shell_command_record_item(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::exec::StreamOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
|
|
@ -76,7 +90,8 @@ mod tests {
|
|||
duration: Duration::from_secs(1),
|
||||
timed_out: false,
|
||||
};
|
||||
let item = user_shell_command_record_item("echo hi", &exec_output);
|
||||
let (_, turn_context) = make_session_and_context();
|
||||
let item = user_shell_command_record_item("echo hi", &exec_output, &turn_context);
|
||||
let ResponseItem::Message { content, .. } = item else {
|
||||
panic!("expected message");
|
||||
};
|
||||
|
|
@ -99,7 +114,8 @@ mod tests {
|
|||
duration: Duration::from_millis(120),
|
||||
timed_out: false,
|
||||
};
|
||||
let record = format_user_shell_command_record("false", &exec_output);
|
||||
let (_, turn_context) = make_session_and_context();
|
||||
let record = format_user_shell_command_record("false", &exec_output, &turn_context);
|
||||
assert_eq!(
|
||||
record,
|
||||
"<user_shell_command>\n<command>\nfalse\n</command>\n<result>\nExit code: 42\nDuration: 0.1200 seconds\nOutput:\ncombined output wins\n</result>\n</user_shell_command>"
|
||||
|
|
|
|||
|
|
@ -407,7 +407,6 @@ $"#;
|
|||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[test_case(ShellModelOutput::Shell)]
|
||||
#[test_case(ShellModelOutput::ShellCommand)]
|
||||
#[test_case(ShellModelOutput::LocalShell)]
|
||||
async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutput) -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
@ -417,6 +416,7 @@ async fn shell_output_reserializes_truncated_content(output_type: ShellModelOutp
|
|||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
config.tool_output_token_limit = Some(200);
|
||||
if matches!(output_type, ShellModelOutput::ShellCommand) {
|
||||
config.features.enable(Feature::ShellCommandTool);
|
||||
}
|
||||
|
|
@ -457,10 +457,7 @@ Output:
|
|||
4
|
||||
5
|
||||
6
|
||||
.*
|
||||
\[\.{3} omitted \d+ of 400 lines \.{3}\]
|
||||
|
||||
.*
|
||||
.*…45 tokens truncated….*
|
||||
396
|
||||
397
|
||||
398
|
||||
|
|
@ -858,7 +855,7 @@ async fn shell_command_output_is_not_truncated_under_10k_bytes() -> Result<()> {
|
|||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Output:
|
||||
1{5000}$"; // TODO: this is very wrong!!!
|
||||
1{10000}$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
|
|
@ -911,12 +908,8 @@ async fn shell_command_output_is_not_truncated_over_10k_bytes() -> Result<()> {
|
|||
|
||||
let expected_pattern = r"(?s)^Exit code: 0
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: 1
|
||||
Output:
|
||||
1*
|
||||
\[... removed 1 bytes to fit 10000 byte limit ...\]
|
||||
|
||||
$";
|
||||
1*…1 chars truncated…1*$";
|
||||
assert_regex_match(expected_pattern, output);
|
||||
|
||||
Ok(())
|
||||
|
|
|
|||
|
|
@ -101,17 +101,16 @@ async fn truncate_function_error_trims_respond_to_model() -> Result<()> {
|
|||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
let mut builder = test_codex().with_model("gpt-5.1").with_config(|config| {
|
||||
config.tool_output_token_limit = Some(100_000);
|
||||
});
|
||||
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
|
|
@ -120,13 +119,13 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
|||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 400; $i++) { Write-Output $i }"
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 400"],
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
|
|
@ -162,14 +161,169 @@ async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
|||
.context("function_call_output present for shell call")?;
|
||||
let output = output.replace("\r\n", "\n");
|
||||
|
||||
// Expect plain text (not JSON) with truncation markers and line elision.
|
||||
// Expect plain text (not JSON) containing the entire shell output.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 400094, "we should be almost 100k tokens");
|
||||
|
||||
assert!(
|
||||
!output.contains("tokens truncated"),
|
||||
"shell output should not contain tokens truncated marker: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_model("gpt-5.1");
|
||||
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
// Inspect what we sent back to the model; it should contain a truncated
|
||||
// function_call_output for the shell call.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
let output = output.replace("\r\n", "\n");
|
||||
|
||||
// Expect plain text (not JSON) containing the entire shell output.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 9976); // ~10k characters
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#;
|
||||
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Verifies that a standard tool call (shell) exceeding the model formatting
|
||||
// limits is truncated before being sent back to the model.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn tool_call_output_exceeds_limit_truncated_for_model() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
// Use a model that exposes the generic shell tool.
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5.1-codex").expect("gpt-5.1-codex is a model family");
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
let call_id = "shell-too-large";
|
||||
let args = if cfg!(windows) {
|
||||
serde_json::json!({
|
||||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 100000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 100000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
|
||||
// First response: model tells us to run the tool; second: complete the turn.
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, "shell", &serde_json::to_string(&args)?),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "done"),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy("trigger big shell output", SandboxPolicy::DangerFullAccess)
|
||||
.await?;
|
||||
|
||||
// Inspect what we sent back to the model; it should contain a truncated
|
||||
// function_call_output for the shell call.
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
let output = output.replace("\r\n", "\n");
|
||||
|
||||
// Expect plain text (not JSON) containing the entire shell output.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0
|
||||
Wall time: .* seconds
|
||||
Total output lines: 400
|
||||
Wall time: [0-9]+(?:\.[0-9]+)? seconds
|
||||
Total output lines: 100000
|
||||
Output:
|
||||
1
|
||||
2
|
||||
|
|
@ -177,15 +331,9 @@ Output:
|
|||
4
|
||||
5
|
||||
6
|
||||
.*
|
||||
\[\.{3} omitted 144 of 400 lines \.{3}\]
|
||||
|
||||
.*
|
||||
396
|
||||
397
|
||||
398
|
||||
399
|
||||
400
|
||||
.*…137224 tokens truncated.*
|
||||
99999
|
||||
100000
|
||||
$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
|
|
@ -211,13 +359,13 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
|||
"command": [
|
||||
"powershell",
|
||||
"-Command",
|
||||
"for ($i=1; $i -le 2000; $i++) { Write-Output $i }"
|
||||
"for ($i=1; $i -le 10000; $i++) { Write-Output $i }"
|
||||
],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
} else {
|
||||
serde_json::json!({
|
||||
"command": ["/bin/sh", "-c", "seq 1 2000"],
|
||||
"command": ["/bin/sh", "-c", "seq 1 10000"],
|
||||
"timeout_ms": 5_000,
|
||||
})
|
||||
};
|
||||
|
|
@ -249,11 +397,11 @@ async fn tool_call_output_truncated_only_once() -> Result<()> {
|
|||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for shell call")?;
|
||||
|
||||
let total_line_headers = output.matches("Total output lines:").count();
|
||||
let truncation_markers = output.matches("tokens truncated").count();
|
||||
|
||||
assert_eq!(
|
||||
total_line_headers, 1,
|
||||
"shell output should carry only one truncation header: {output}"
|
||||
truncation_markers, 1,
|
||||
"shell output should carry only one truncation marker: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
|
|
@ -321,6 +469,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
|||
disabled_tools: None,
|
||||
},
|
||||
);
|
||||
config.tool_output_token_limit = Some(500);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
|
|
@ -337,12 +486,6 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
|||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for rmcp call")?;
|
||||
|
||||
// Expect plain text with token-based truncation marker; the original JSON body
|
||||
// is truncated in the middle of the echo string.
|
||||
assert!(
|
||||
serde_json::from_str::<Value>(&output).is_err(),
|
||||
"expected truncated MCP output to be plain text"
|
||||
);
|
||||
assert!(
|
||||
!output.contains("Total output lines:"),
|
||||
"MCP output should not include line-based truncation header: {output}"
|
||||
|
|
@ -350,6 +493,7 @@ async fn mcp_tool_call_output_exceeds_limit_truncated_for_model() -> Result<()>
|
|||
|
||||
let truncated_pattern = r#"(?s)^\{"echo":\s*"ECHOING: long-message-with-newlines-.*tokens truncated.*long-message-with-newlines-.*$"#;
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
assert!(output.len() < 2500, "{}", output.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -501,7 +645,9 @@ async fn token_policy_marker_reports_tokens() -> Result<()> {
|
|||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert_regex_match(r"\[\u{2026}127 tokens truncated\u{2026}]", &output);
|
||||
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5\\n.*?…\d+ tokens truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -552,14 +698,16 @@ async fn byte_policy_marker_reports_bytes() -> Result<()> {
|
|||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert_regex_match(r"\[\u{2026}505 bytes truncated\u{2026}]", &output);
|
||||
let pattern = r#"(?s)^\{"output":"Total output lines: 150\\n\\n1\\n2\\n3\\n4\\n5.*?…\d+ chars truncated…7\\n138\\n139\\n140\\n141\\n142\\n143\\n144\\n145\\n146\\n147\\n148\\n149\\n150\\n","metadata":\{"exit_code":0,"duration_seconds":0\.0\}\}$"#;
|
||||
|
||||
assert_regex_match(pattern, &output);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// Overriding config with a large token budget should avoid truncation.
|
||||
// Shell tool output should remain intact when the config opts into a large token budget.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn large_budget_avoids_truncation() -> Result<()> {
|
||||
async fn shell_tool_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
|
@ -576,6 +724,7 @@ async fn large_budget_avoids_truncation() -> Result<()> {
|
|||
"command": ["/bin/sh", "-c", "seq 1 1000"],
|
||||
"timeout_ms": 5_000,
|
||||
});
|
||||
let expected_body: String = (1..=1000).map(|i| format!("{i}\n")).collect();
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
|
|
@ -607,6 +756,10 @@ async fn large_budget_avoids_truncation() -> Result<()> {
|
|||
.function_call_output_text(call_id)
|
||||
.context("shell output present")?;
|
||||
|
||||
assert!(
|
||||
output.ends_with(&expected_body),
|
||||
"expected entire shell output when budget increased: {output}"
|
||||
);
|
||||
assert!(
|
||||
!output.contains("truncated"),
|
||||
"output should remain untruncated with ample budget"
|
||||
|
|
@ -614,3 +767,101 @@ async fn large_budget_avoids_truncation() -> Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// MCP server output should also remain intact when the config increases the token limit.
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 1)]
|
||||
async fn mcp_tool_call_output_not_truncated_with_custom_limit() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let call_id = "rmcp-untruncated";
|
||||
let server_name = "rmcp";
|
||||
let tool_name = format!("mcp__{server_name}__echo");
|
||||
let large_msg = "a".repeat(80_000);
|
||||
let args_json = serde_json::json!({ "message": large_msg });
|
||||
|
||||
mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_response_created("resp-1"),
|
||||
responses::ev_function_call(call_id, &tool_name, &args_json.to_string()),
|
||||
responses::ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let mock2 = mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
responses::ev_assistant_message("msg-1", "rmcp echo tool completed."),
|
||||
responses::ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let rmcp_test_server_bin = CargoBuild::new()
|
||||
.package("codex-rmcp-client")
|
||||
.bin("test_stdio_server")
|
||||
.run()?
|
||||
.path()
|
||||
.to_string_lossy()
|
||||
.into_owned();
|
||||
|
||||
let mut builder = test_codex().with_config(move |config| {
|
||||
config.features.enable(Feature::RmcpClient);
|
||||
config.tool_output_token_limit = Some(50_000);
|
||||
config.mcp_servers.insert(
|
||||
server_name.to_string(),
|
||||
codex_core::config::types::McpServerConfig {
|
||||
transport: codex_core::config::types::McpServerTransportConfig::Stdio {
|
||||
command: rmcp_test_server_bin,
|
||||
args: Vec::new(),
|
||||
env: None,
|
||||
env_vars: Vec::new(),
|
||||
cwd: None,
|
||||
},
|
||||
enabled: true,
|
||||
startup_timeout_sec: Some(std::time::Duration::from_secs(10)),
|
||||
tool_timeout_sec: None,
|
||||
enabled_tools: None,
|
||||
disabled_tools: None,
|
||||
},
|
||||
);
|
||||
});
|
||||
let fixture = builder.build(&server).await?;
|
||||
|
||||
fixture
|
||||
.submit_turn_with_policy(
|
||||
"call the rmcp echo tool with a very large message",
|
||||
SandboxPolicy::ReadOnly,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let output = mock2
|
||||
.single_request()
|
||||
.function_call_output_text(call_id)
|
||||
.context("function_call_output present for rmcp call")?;
|
||||
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
assert_eq!(
|
||||
output.len(),
|
||||
80031,
|
||||
"parsed MCP output should retain its serialized length"
|
||||
);
|
||||
let expected_echo = format!("ECHOING: {large_msg}");
|
||||
let echo_str = parsed["echo"]
|
||||
.as_str()
|
||||
.context("echo field should be a string in rmcp echo output")?;
|
||||
assert_eq!(
|
||||
echo_str.len(),
|
||||
expected_echo.len(),
|
||||
"echo length should match"
|
||||
);
|
||||
assert_eq!(echo_str, expected_echo);
|
||||
assert!(
|
||||
!output.contains("truncated"),
|
||||
"output should not include truncation markers when limit is raised: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1586,8 +1586,7 @@ PY
|
|||
let large_output = outputs.get(call_id).expect("missing large output summary");
|
||||
|
||||
let output_text = large_output.output.replace("\r\n", "\n");
|
||||
let truncated_pattern = r#"(?s)^(token token \n){5,}.*\[\u{2026}\d+ tokens truncated\u{2026}]\n(token token \n){5,}"#;
|
||||
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$";
|
||||
assert_regex_match(truncated_pattern, &output_text);
|
||||
|
||||
let original_tokens = large_output
|
||||
|
|
|
|||
|
|
@ -207,10 +207,16 @@ async fn user_shell_command_history_is_persisted_and_shared_with_model() -> anyh
|
|||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[cfg(not(target_os = "windows"))] // TODO: unignore on windows
|
||||
async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<()> {
|
||||
let server = responses::start_mock_server().await;
|
||||
let mut builder = core_test_support::test_codex::test_codex();
|
||||
let test = builder.build(&server).await?;
|
||||
let builder = core_test_support::test_codex::test_codex();
|
||||
let test = builder
|
||||
.with_config(|config| {
|
||||
config.tool_output_token_limit = Some(100);
|
||||
})
|
||||
.build(&server)
|
||||
.await?;
|
||||
|
||||
#[cfg(windows)]
|
||||
let command = r#"for ($i=1; $i -le 400; $i++) { Write-Output $i }"#.to_string();
|
||||
|
|
@ -249,10 +255,9 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
|
|||
.expect("command message recorded in request");
|
||||
let command_message = command_message.replace("\r\n", "\n");
|
||||
|
||||
let head = (1..=128).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (273..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body =
|
||||
format!("Total output lines: 400\n\n{head}\n[... omitted 144 of 400 lines ...]\n\n{tail}");
|
||||
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}");
|
||||
let escaped_command = escape(&command);
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
|
|
@ -270,6 +275,7 @@ async fn user_shell_command_is_truncated_only_once() -> anyhow::Result<()> {
|
|||
let server = start_mock_server().await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.tool_output_token_limit = Some(100);
|
||||
config.model = "gpt-5.1-codex".to_string();
|
||||
config.model_family =
|
||||
find_family_for_model("gpt-5-codex").expect("gpt-5-codex is a model family");
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue