fix: don't truncate at new lines (#6907)

2025-11-19 09:05:48 -08:00 · 2025-11-19 09:05:48 -08:00 · d62cab9a06
commit d62cab9a06
parent d5dfba2509
5 changed files with 8 additions and 18 deletions
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@ -282,22 +282,11 @@ fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
 }

 fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
-    if let Some(head) = s.get(..left_budget)
-        && let Some(i) = head.rfind('\n')
-    {
-        return i + 1;
-    }
    truncate_on_boundary(s, left_budget).len()
 }

 fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
    let start_tail = s.len().saturating_sub(right_budget);
-    if let Some(tail) = s.get(start_tail..)
-        && let Some(i) = tail.find('\n')
-    {
-        return start_tail + i + 1;
-    }
-
    let mut idx = start_tail.min(s.len());
    while idx < s.len() && !s.is_char_boundary(idx) {
        idx += 1;
@ -420,7 +409,7 @@ mod tests {
    fn truncate_middle_tokens_handles_utf8_content() {
        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
        let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
-        assert_eq!(out, "😀😀😀😀…8 tokens truncated…");
+        assert_eq!(out, "😀😀😀😀…8 tokens truncated… line with text\n");
        assert_eq!(tokens, Some(16));
    }

@ -428,7 +417,7 @@ mod tests {
    fn truncate_middle_bytes_handles_utf8_content() {
        let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
        let out = truncate_text(s, TruncationPolicy::Bytes(20));
-        assert_eq!(out, "😀😀…31 chars truncated…");
+        assert_eq!(out, "😀😀…21 chars truncated…with text\n");
    }

    #[test]
--- a/codex-rs/core/tests/suite/shell_serialization.rs
+++ b/codex-rs/core/tests/suite/shell_serialization.rs
@ -457,7 +457,7 @@ Output:
 4
 5
 6
-.*…45 tokens truncated….*
+.*…46 tokens truncated….*
 396
 397
 398
--- a/codex-rs/core/tests/suite/truncation.rs
+++ b/codex-rs/core/tests/suite/truncation.rs
@ -167,7 +167,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
        "expected truncated shell output to be plain text"
    );

-    assert_eq!(output.len(), 400094, "we should be almost 100k tokens");
+    assert_eq!(output.len(), 400097, "we should be almost 100k tokens");

    assert!(
        !output.contains("tokens truncated"),
@ -245,7 +245,7 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
    );

    assert_eq!(output.len(), 9976); // ~10k characters
-    let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#;
+    let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;

    assert_regex_match(truncated_pattern, &output);

--- a/codex-rs/core/tests/suite/unified_exec.rs
+++ b/codex-rs/core/tests/suite/unified_exec.rs
@ -1586,7 +1586,7 @@ PY
    let large_output = outputs.get(call_id).expect("missing large output summary");

    let output_text = large_output.output.replace("\r\n", "\n");
-    let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$";
+    let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
    assert_regex_match(truncated_pattern, &output_text);

    let original_tokens = large_output
--- a/codex-rs/core/tests/suite/user_shell_cmd.rs
+++ b/codex-rs/core/tests/suite/user_shell_cmd.rs
@ -257,7 +257,8 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(

    let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
    let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
-    let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}");
+    let truncated_body =
+        format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
    let escaped_command = escape(&command);
    let escaped_truncated_body = escape(&truncated_body);
    let expected_pattern = format!(