From a9ae43621b6d583d8d0ff9beeee53484a7a1f38b Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Mon, 9 Mar 2026 23:13:48 -0600 Subject: [PATCH] Move exec command truncation into ExecCommandToolOutput (#14169) Summary - relocate truncation logic for exec command output into the new `ExecCommandToolOutput` response helper instead of centralized handler code - update all affected tools and unified exec handling to use the new response item structure and eliminate `Function(FunctionToolOutput)` responses - adjust context, registry, and handler interfaces to align with the new response semantics and error fields Testing - Not run (not requested) --- codex-rs/core/src/tools/context.rs | 122 ++++++++++++++++++ .../core/src/tools/handlers/unified_exec.rs | 50 ++----- codex-rs/core/src/unified_exec/mod.rs | 34 ++--- .../core/src/unified_exec/process_manager.rs | 23 ++-- 4 files changed, 154 insertions(+), 75 deletions(-) diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index 41d11e6b3..a3521c466 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -3,7 +3,10 @@ use crate::codex::TurnContext; use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES; use crate::tools::TELEMETRY_PREVIEW_MAX_LINES; use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE; +use crate::truncate::TruncationPolicy; +use crate::truncate::formatted_truncate_text; use crate::turn_diff_tracker::TurnDiffTracker; +use crate::unified_exec::resolve_max_tokens; use codex_protocol::mcp::CallToolResult; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; @@ -14,6 +17,7 @@ use codex_protocol::models::function_call_output_content_items_to_text; use codex_utils_string::take_bytes_at_char_boundary; use std::borrow::Cow; use std::sync::Arc; +use std::time::Duration; use tokio::sync::Mutex; pub type SharedTurnDiffTracker = Arc>; @@ -116,6 +120,10 @@ impl FunctionToolOutput { success, } } + + pub fn into_text(self) -> String { + function_call_output_content_items_to_text(&self.body).unwrap_or_default() + } } impl ToolOutput for FunctionToolOutput { @@ -135,6 +143,77 @@ impl ToolOutput for FunctionToolOutput { } } +#[derive(Debug, Clone, PartialEq)] +pub struct ExecCommandToolOutput { + pub event_call_id: String, + pub chunk_id: String, + pub wall_time: Duration, + /// Raw bytes returned for this unified exec call before any truncation. + pub raw_output: Vec, + pub max_output_tokens: Option, + pub process_id: Option, + pub exit_code: Option, + pub original_token_count: Option, + pub session_command: Option>, +} + +impl ToolOutput for ExecCommandToolOutput { + fn log_preview(&self) -> String { + telemetry_preview(&self.response_text()) + } + + fn success_for_logging(&self) -> bool { + true + } + + fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { + function_tool_response( + call_id, + payload, + vec![FunctionCallOutputContentItem::InputText { + text: self.response_text(), + }], + Some(true), + ) + } +} + +impl ExecCommandToolOutput { + pub(crate) fn truncated_output(&self) -> String { + let text = String::from_utf8_lossy(&self.raw_output).to_string(); + let max_tokens = resolve_max_tokens(self.max_output_tokens); + formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)) + } + + fn response_text(&self) -> String { + let mut sections = Vec::new(); + + if !self.chunk_id.is_empty() { + sections.push(format!("Chunk ID: {}", self.chunk_id)); + } + + let wall_time_seconds = self.wall_time.as_secs_f64(); + sections.push(format!("Wall time: {wall_time_seconds:.4} seconds")); + + if let Some(exit_code) = self.exit_code { + sections.push(format!("Process exited with code {exit_code}")); + } + + if let Some(process_id) = &self.process_id { + sections.push(format!("Process running with session ID {process_id}")); + } + + if let Some(original_token_count) = self.original_token_count { + sections.push(format!("Original token count: {original_token_count}")); + } + + sections.push("Output:".to_string()); + sections.push(self.truncated_output()); + + sections.join("\n") + } +} + fn function_tool_response( call_id: &str, payload: &ToolPayload, @@ -204,6 +283,7 @@ fn telemetry_preview(content: &str) -> String { #[cfg(test)] mod tests { use super::*; + use core_test_support::assert_regex_match; use pretty_assertions::assert_eq; #[test] @@ -336,4 +416,46 @@ mod tests { assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1); assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE)); } + + #[test] + fn exec_command_tool_output_formats_truncated_response() { + let payload = ToolPayload::Function { + arguments: "{}".to_string(), + }; + let response = ExecCommandToolOutput { + event_call_id: "call-42".to_string(), + chunk_id: "abc123".to_string(), + wall_time: std::time::Duration::from_millis(1250), + raw_output: b"token one token two token three token four token five".to_vec(), + max_output_tokens: Some(4), + process_id: None, + exit_code: Some(0), + original_token_count: Some(10), + session_command: None, + } + .into_response("call-42", &payload); + + match response { + ResponseInputItem::FunctionCallOutput { call_id, output } => { + assert_eq!(call_id, "call-42"); + assert_eq!(output.success, Some(true)); + let text = output + .body + .to_text() + .expect("exec output should serialize as text"); + assert_regex_match( + r#"(?sx) + ^Chunk\ ID:\ abc123 + \nWall\ time:\ \d+\.\d{4}\ seconds + \nProcess\ exited\ with\ code\ 0 + \nOriginal\ token\ count:\ 10 + \nOutput: + \n.*tokens\ truncated.* + $"#, + &text, + ); + } + other => panic!("expected FunctionCallOutput, got {other:?}"), + } + } } diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 721fd30d1..4eb7125e9 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -7,7 +7,7 @@ use crate::sandboxing::SandboxPermissions; use crate::shell::Shell; use crate::shell::get_shell_by_model_provided_path; use crate::skills::maybe_emit_implicit_skill_invocation; -use crate::tools::context::FunctionToolOutput; +use crate::tools::context::ExecCommandToolOutput; use crate::tools::context::ToolInvocation; use crate::tools::context::ToolPayload; use crate::tools::handlers::apply_granted_turn_permissions; @@ -21,7 +21,6 @@ use crate::tools::registry::ToolKind; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::UnifiedExecContext; use crate::unified_exec::UnifiedExecProcessManager; -use crate::unified_exec::UnifiedExecResponse; use crate::unified_exec::WriteStdinRequest; use async_trait::async_trait; use codex_protocol::models::PermissionProfile; @@ -82,7 +81,7 @@ fn default_tty() -> bool { #[async_trait] impl ToolHandler for UnifiedExecHandler { - type Output = FunctionToolOutput; + type Output = ExecCommandToolOutput; fn kind(&self) -> ToolKind { ToolKind::Function @@ -230,7 +229,17 @@ impl ToolHandler for UnifiedExecHandler { .await? { manager.release_process_id(&process_id).await; - return Ok(output); + return Ok(ExecCommandToolOutput { + event_call_id: String::new(), + chunk_id: String::new(), + wall_time: std::time::Duration::ZERO, + raw_output: output.into_text().into_bytes(), + max_output_tokens: None, + process_id: None, + exit_code: None, + original_token_count: None, + session_command: None, + }); } manager @@ -290,9 +299,7 @@ impl ToolHandler for UnifiedExecHandler { } }; - let content = format_response(&response); - - Ok(FunctionToolOutput::from_text(content, Some(true))) + Ok(response) } } @@ -321,35 +328,6 @@ pub(crate) fn get_command( Ok(shell.derive_exec_args(&args.cmd, use_login_shell)) } -fn format_response(response: &UnifiedExecResponse) -> String { - let mut sections = Vec::new(); - - if !response.chunk_id.is_empty() { - sections.push(format!("Chunk ID: {}", response.chunk_id)); - } - - let wall_time_seconds = response.wall_time.as_secs_f64(); - sections.push(format!("Wall time: {wall_time_seconds:.4} seconds")); - - if let Some(exit_code) = response.exit_code { - sections.push(format!("Process exited with code {exit_code}")); - } - - if let Some(process_id) = &response.process_id { - // Training still uses "session ID". - sections.push(format!("Process running with session ID {process_id}")); - } - - if let Some(original_token_count) = response.original_token_count { - sections.push(format!("Original token count: {original_token_count}")); - } - - sections.push("Output:".to_string()); - sections.push(response.output.clone()); - - sections.join("\n") -} - #[cfg(test)] mod tests { use super::*; diff --git a/codex-rs/core/src/unified_exec/mod.rs b/codex-rs/core/src/unified_exec/mod.rs index b7d349e17..cd809d587 100644 --- a/codex-rs/core/src/unified_exec/mod.rs +++ b/codex-rs/core/src/unified_exec/mod.rs @@ -26,7 +26,6 @@ use std::collections::HashSet; use std::path::PathBuf; use std::sync::Arc; use std::sync::Weak; -use std::time::Duration; use codex_network_proxy::NetworkProxy; use codex_protocol::models::PermissionProfile; @@ -108,20 +107,6 @@ pub(crate) struct WriteStdinRequest<'a> { pub max_output_tokens: Option, } -#[derive(Debug, Clone, PartialEq)] -pub(crate) struct UnifiedExecResponse { - pub event_call_id: String, - pub chunk_id: String, - pub wall_time: Duration, - pub output: String, - /// Raw bytes returned for this unified exec call before any truncation. - pub raw_output: Vec, - pub process_id: Option, - pub exit_code: Option, - pub original_token_count: Option, - pub session_command: Option>, -} - #[derive(Default)] pub(crate) struct ProcessStore { processes: HashMap, @@ -192,6 +177,7 @@ mod tests { use crate::codex::make_session_and_context; use crate::protocol::AskForApproval; use crate::protocol::SandboxPolicy; + use crate::tools::context::ExecCommandToolOutput; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::WriteStdinRequest; use core_test_support::skip_if_sandbox; @@ -218,7 +204,7 @@ mod tests { turn: &Arc, cmd: &str, yield_time_ms: u64, - ) -> Result { + ) -> Result { let context = UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string()); let process_id = session @@ -255,7 +241,7 @@ mod tests { process_id: &str, input: &str, yield_time_ms: u64, - ) -> Result { + ) -> Result { session .services .unified_exec_manager @@ -330,7 +316,7 @@ mod tests { ) .await?; assert!( - out_2.output.contains("codex"), + out_2.truncated_output().contains("codex"), "expected environment variable output" ); @@ -366,7 +352,7 @@ mod tests { "short command should not report a process id if it exits quickly" ); assert!( - !out_2.output.contains("codex"), + !out_2.truncated_output().contains("codex"), "short command should run in a fresh shell" ); @@ -382,7 +368,7 @@ mod tests { ) .await?; assert!( - out_3.output.contains("codex"), + out_3.truncated_output().contains("codex"), "session should preserve state" ); @@ -420,7 +406,7 @@ mod tests { ) .await?; assert!( - !out_2.output.contains(TEST_VAR_VALUE), + !out_2.truncated_output().contains(TEST_VAR_VALUE), "timeout too short should yield incomplete output" ); @@ -429,7 +415,7 @@ mod tests { let out_3 = write_stdin(&session, process_id, "", 100).await?; assert!( - out_3.output.contains(TEST_VAR_VALUE), + out_3.truncated_output().contains(TEST_VAR_VALUE), "subsequent poll should retrieve output" ); @@ -444,7 +430,7 @@ mod tests { let result = exec_command(&session, &turn, "echo codex", 120_000).await?; assert!(result.process_id.is_some()); - assert!(result.output.contains("codex")); + assert!(result.truncated_output().contains("codex")); Ok(()) } @@ -459,7 +445,7 @@ mod tests { result.process_id.is_some(), "completed command should report a process id" ); - assert!(result.output.contains("codex")); + assert!(result.truncated_output().contains("codex")); assert!( session diff --git a/codex-rs/core/src/unified_exec/process_manager.rs b/codex-rs/core/src/unified_exec/process_manager.rs index 24214ae66..bbe561cfb 100644 --- a/codex-rs/core/src/unified_exec/process_manager.rs +++ b/codex-rs/core/src/unified_exec/process_manager.rs @@ -16,6 +16,7 @@ use crate::exec_env::create_env; use crate::exec_policy::ExecApprovalRequest; use crate::protocol::ExecCommandSource; use crate::sandboxing::ExecRequest; +use crate::tools::context::ExecCommandToolOutput; use crate::tools::events::ToolEmitter; use crate::tools::events::ToolEventCtx; use crate::tools::events::ToolEventStage; @@ -25,9 +26,7 @@ use crate::tools::orchestrator::ToolOrchestrator; use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest; use crate::tools::runtimes::unified_exec::UnifiedExecRuntime; use crate::tools::sandboxing::ToolCtx; -use crate::truncate::TruncationPolicy; use crate::truncate::approx_token_count; -use crate::truncate::formatted_truncate_text; use crate::unified_exec::ExecCommandRequest; use crate::unified_exec::MAX_UNIFIED_EXEC_PROCESSES; use crate::unified_exec::MAX_YIELD_TIME_MS; @@ -38,7 +37,6 @@ use crate::unified_exec::ProcessStore; use crate::unified_exec::UnifiedExecContext; use crate::unified_exec::UnifiedExecError; use crate::unified_exec::UnifiedExecProcessManager; -use crate::unified_exec::UnifiedExecResponse; use crate::unified_exec::WARNING_UNIFIED_EXEC_PROCESSES; use crate::unified_exec::WriteStdinRequest; use crate::unified_exec::async_watcher::emit_exec_end_for_unified_exec; @@ -51,7 +49,6 @@ use crate::unified_exec::process::OutputBuffer; use crate::unified_exec::process::OutputHandles; use crate::unified_exec::process::SpawnLifecycleHandle; use crate::unified_exec::process::UnifiedExecProcess; -use crate::unified_exec::resolve_max_tokens; const UNIFIED_EXEC_ENV: [(&str, &str); 10] = [ ("NO_COLOR", "1"), @@ -159,7 +156,7 @@ impl UnifiedExecProcessManager { &self, request: ExecCommandRequest, context: &UnifiedExecContext, - ) -> Result { + ) -> Result { let cwd = request .workdir .clone() @@ -194,7 +191,6 @@ impl UnifiedExecProcessManager { emitter.emit(event_ctx, ToolEventStage::Begin).await; start_streaming_output(&process, context, Arc::clone(&transcript)); - let max_tokens = resolve_max_tokens(request.max_output_tokens); let yield_time_ms = clamp_yield_time(request.yield_time_ms); let start = Instant::now(); @@ -221,7 +217,6 @@ impl UnifiedExecProcessManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let exit_code = process.exit_code(); let has_exited = process.has_exited() || exit_code.is_some(); let chunk_id = generate_chunk_id(); @@ -240,7 +235,7 @@ impl UnifiedExecProcessManager { cwd.clone(), Some(process_id), Arc::clone(&transcript), - output.clone(), + text.clone(), exit, wall_time, ) @@ -276,12 +271,12 @@ impl UnifiedExecProcessManager { }; let original_token_count = approx_token_count(&text); - let response = UnifiedExecResponse { + let response = ExecCommandToolOutput { event_call_id: context.call_id.clone(), chunk_id, wall_time, - output, raw_output: collected, + max_output_tokens: request.max_output_tokens, process_id: if has_exited { None } else { @@ -298,7 +293,7 @@ impl UnifiedExecProcessManager { pub(crate) async fn write_stdin( &self, request: WriteStdinRequest<'_>, - ) -> Result { + ) -> Result { let process_id = request.process_id.to_string(); let PreparedProcessHandles { @@ -324,7 +319,6 @@ impl UnifiedExecProcessManager { tokio::time::sleep(Duration::from_millis(100)).await; } - let max_tokens = resolve_max_tokens(request.max_output_tokens); let yield_time_ms = { // Empty polls use configurable background timeout bounds. Non-empty // writes keep a fixed max cap so interactive stdin remains responsive. @@ -349,7 +343,6 @@ impl UnifiedExecProcessManager { let wall_time = Instant::now().saturating_duration_since(start); let text = String::from_utf8_lossy(&collected).to_string(); - let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens)); let original_token_count = approx_token_count(&text); let chunk_id = generate_chunk_id(); @@ -375,12 +368,12 @@ impl UnifiedExecProcessManager { } }; - let response = UnifiedExecResponse { + let response = ExecCommandToolOutput { event_call_id, chunk_id, wall_time, - output, raw_output: collected, + max_output_tokens: request.max_output_tokens, process_id, exit_code, original_token_count: Some(original_token_count),