fix: don't truncate at new lines (#6907)
This commit is contained in:
parent
d5dfba2509
commit
d62cab9a06
5 changed files with 8 additions and 18 deletions
|
|
@ -282,22 +282,11 @@ fn truncate_on_boundary(input: &str, max_len: usize) -> &str {
|
|||
}
|
||||
|
||||
fn pick_prefix_end(s: &str, left_budget: usize) -> usize {
|
||||
if let Some(head) = s.get(..left_budget)
|
||||
&& let Some(i) = head.rfind('\n')
|
||||
{
|
||||
return i + 1;
|
||||
}
|
||||
truncate_on_boundary(s, left_budget).len()
|
||||
}
|
||||
|
||||
fn pick_suffix_start(s: &str, right_budget: usize) -> usize {
|
||||
let start_tail = s.len().saturating_sub(right_budget);
|
||||
if let Some(tail) = s.get(start_tail..)
|
||||
&& let Some(i) = tail.find('\n')
|
||||
{
|
||||
return start_tail + i + 1;
|
||||
}
|
||||
|
||||
let mut idx = start_tail.min(s.len());
|
||||
while idx < s.len() && !s.is_char_boundary(idx) {
|
||||
idx += 1;
|
||||
|
|
@ -420,7 +409,7 @@ mod tests {
|
|||
fn truncate_middle_tokens_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let (out, tokens) = truncate_with_token_budget(s, TruncationPolicy::Tokens(8));
|
||||
assert_eq!(out, "😀😀😀😀…8 tokens truncated…");
|
||||
assert_eq!(out, "😀😀😀😀…8 tokens truncated… line with text\n");
|
||||
assert_eq!(tokens, Some(16));
|
||||
}
|
||||
|
||||
|
|
@ -428,7 +417,7 @@ mod tests {
|
|||
fn truncate_middle_bytes_handles_utf8_content() {
|
||||
let s = "😀😀😀😀😀😀😀😀😀😀\nsecond line with text\n";
|
||||
let out = truncate_text(s, TruncationPolicy::Bytes(20));
|
||||
assert_eq!(out, "😀😀…31 chars truncated…");
|
||||
assert_eq!(out, "😀😀…21 chars truncated…with text\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
|
|
|||
|
|
@ -457,7 +457,7 @@ Output:
|
|||
4
|
||||
5
|
||||
6
|
||||
.*…45 tokens truncated….*
|
||||
.*…46 tokens truncated….*
|
||||
396
|
||||
397
|
||||
398
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ async fn tool_call_output_configured_limit_chars_type() -> Result<()> {
|
|||
"expected truncated shell output to be plain text"
|
||||
);
|
||||
|
||||
assert_eq!(output.len(), 400094, "we should be almost 100k tokens");
|
||||
assert_eq!(output.len(), 400097, "we should be almost 100k tokens");
|
||||
|
||||
assert!(
|
||||
!output.contains("tokens truncated"),
|
||||
|
|
@ -245,7 +245,7 @@ async fn tool_call_output_exceeds_limit_truncated_chars_limit() -> Result<()> {
|
|||
);
|
||||
|
||||
assert_eq!(output.len(), 9976); // ~10k characters
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\n.*?…578898 chars truncated….*$"#;
|
||||
let truncated_pattern = r#"(?s)^Exit code: 0\nWall time: 0 seconds\nTotal output lines: 100000\nOutput:\n.*?…\d+ chars truncated….*$"#;
|
||||
|
||||
assert_regex_match(truncated_pattern, &output);
|
||||
|
||||
|
|
|
|||
|
|
@ -1586,7 +1586,7 @@ PY
|
|||
let large_output = outputs.get(call_id).expect("missing large output summary");
|
||||
|
||||
let output_text = large_output.output.replace("\r\n", "\n");
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated…(token token \n){5,}$";
|
||||
let truncated_pattern = r"(?s)^Total output lines: \d+\n\n(token token \n){5,}.*…\d+ tokens truncated….*(token token \n){5,}$";
|
||||
assert_regex_match(truncated_pattern, &output_text);
|
||||
|
||||
let original_tokens = large_output
|
||||
|
|
|
|||
|
|
@ -257,7 +257,8 @@ async fn user_shell_command_output_is_truncated_in_history() -> anyhow::Result<(
|
|||
|
||||
let head = (1..=69).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let tail = (352..=400).map(|i| format!("{i}\n")).collect::<String>();
|
||||
let truncated_body = format!("Total output lines: 400\n\n{head}…273 tokens truncated…{tail}");
|
||||
let truncated_body =
|
||||
format!("Total output lines: 400\n\n{head}70…273 tokens truncated…351\n{tail}");
|
||||
let escaped_command = escape(&command);
|
||||
let escaped_truncated_body = escape(&truncated_body);
|
||||
let expected_pattern = format!(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue