Move exec command truncation into ExecCommandToolOutput (#14169)

Summary
- relocate truncation logic for exec command output into the new
`ExecCommandToolOutput` response helper instead of centralized handler
code
- update all affected tools and unified exec handling to use the new
response item structure and eliminate `Function(FunctionToolOutput)`
responses
- adjust context, registry, and handler interfaces to align with the new
response semantics and error fields

Testing
- Not run (not requested)
This commit is contained in:
pakrym-oai 2026-03-09 23:13:48 -06:00 committed by GitHub
parent 0c33af7746
commit a9ae43621b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 154 additions and 75 deletions

View file

@ -3,7 +3,10 @@ use crate::codex::TurnContext;
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
@ -14,6 +17,7 @@ use codex_protocol::models::function_call_output_content_items_to_text;
use codex_utils_string::take_bytes_at_char_boundary;
use std::borrow::Cow;
use std::sync::Arc;
use std::time::Duration;
use tokio::sync::Mutex;
pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
@ -116,6 +120,10 @@ impl FunctionToolOutput {
success,
}
}
pub fn into_text(self) -> String {
function_call_output_content_items_to_text(&self.body).unwrap_or_default()
}
}
impl ToolOutput for FunctionToolOutput {
@ -135,6 +143,77 @@ impl ToolOutput for FunctionToolOutput {
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ExecCommandToolOutput {
pub event_call_id: String,
pub chunk_id: String,
pub wall_time: Duration,
/// Raw bytes returned for this unified exec call before any truncation.
pub raw_output: Vec<u8>,
pub max_output_tokens: Option<usize>,
pub process_id: Option<String>,
pub exit_code: Option<i32>,
pub original_token_count: Option<usize>,
pub session_command: Option<Vec<String>>,
}
impl ToolOutput for ExecCommandToolOutput {
fn log_preview(&self) -> String {
telemetry_preview(&self.response_text())
}
fn success_for_logging(&self) -> bool {
true
}
fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
function_tool_response(
call_id,
payload,
vec![FunctionCallOutputContentItem::InputText {
text: self.response_text(),
}],
Some(true),
)
}
}
impl ExecCommandToolOutput {
pub(crate) fn truncated_output(&self) -> String {
let text = String::from_utf8_lossy(&self.raw_output).to_string();
let max_tokens = resolve_max_tokens(self.max_output_tokens);
formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens))
}
fn response_text(&self) -> String {
let mut sections = Vec::new();
if !self.chunk_id.is_empty() {
sections.push(format!("Chunk ID: {}", self.chunk_id));
}
let wall_time_seconds = self.wall_time.as_secs_f64();
sections.push(format!("Wall time: {wall_time_seconds:.4} seconds"));
if let Some(exit_code) = self.exit_code {
sections.push(format!("Process exited with code {exit_code}"));
}
if let Some(process_id) = &self.process_id {
sections.push(format!("Process running with session ID {process_id}"));
}
if let Some(original_token_count) = self.original_token_count {
sections.push(format!("Original token count: {original_token_count}"));
}
sections.push("Output:".to_string());
sections.push(self.truncated_output());
sections.join("\n")
}
}
fn function_tool_response(
call_id: &str,
payload: &ToolPayload,
@ -204,6 +283,7 @@ fn telemetry_preview(content: &str) -> String {
#[cfg(test)]
mod tests {
use super::*;
use core_test_support::assert_regex_match;
use pretty_assertions::assert_eq;
#[test]
@ -336,4 +416,46 @@ mod tests {
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
}
#[test]
fn exec_command_tool_output_formats_truncated_response() {
let payload = ToolPayload::Function {
arguments: "{}".to_string(),
};
let response = ExecCommandToolOutput {
event_call_id: "call-42".to_string(),
chunk_id: "abc123".to_string(),
wall_time: std::time::Duration::from_millis(1250),
raw_output: b"token one token two token three token four token five".to_vec(),
max_output_tokens: Some(4),
process_id: None,
exit_code: Some(0),
original_token_count: Some(10),
session_command: None,
}
.into_response("call-42", &payload);
match response {
ResponseInputItem::FunctionCallOutput { call_id, output } => {
assert_eq!(call_id, "call-42");
assert_eq!(output.success, Some(true));
let text = output
.body
.to_text()
.expect("exec output should serialize as text");
assert_regex_match(
r#"(?sx)
^Chunk\ ID:\ abc123
\nWall\ time:\ \d+\.\d{4}\ seconds
\nProcess\ exited\ with\ code\ 0
\nOriginal\ token\ count:\ 10
\nOutput:
\n.*tokens\ truncated.*
$"#,
&text,
);
}
other => panic!("expected FunctionCallOutput, got {other:?}"),
}
}
}

View file

@ -7,7 +7,7 @@ use crate::sandboxing::SandboxPermissions;
use crate::shell::Shell;
use crate::shell::get_shell_by_model_provided_path;
use crate::skills::maybe_emit_implicit_skill_invocation;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::ExecCommandToolOutput;
use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::handlers::apply_granted_turn_permissions;
@ -21,7 +21,6 @@ use crate::tools::registry::ToolKind;
use crate::unified_exec::ExecCommandRequest;
use crate::unified_exec::UnifiedExecContext;
use crate::unified_exec::UnifiedExecProcessManager;
use crate::unified_exec::UnifiedExecResponse;
use crate::unified_exec::WriteStdinRequest;
use async_trait::async_trait;
use codex_protocol::models::PermissionProfile;
@ -82,7 +81,7 @@ fn default_tty() -> bool {
#[async_trait]
impl ToolHandler for UnifiedExecHandler {
type Output = FunctionToolOutput;
type Output = ExecCommandToolOutput;
fn kind(&self) -> ToolKind {
ToolKind::Function
@ -230,7 +229,17 @@ impl ToolHandler for UnifiedExecHandler {
.await?
{
manager.release_process_id(&process_id).await;
return Ok(output);
return Ok(ExecCommandToolOutput {
event_call_id: String::new(),
chunk_id: String::new(),
wall_time: std::time::Duration::ZERO,
raw_output: output.into_text().into_bytes(),
max_output_tokens: None,
process_id: None,
exit_code: None,
original_token_count: None,
session_command: None,
});
}
manager
@ -290,9 +299,7 @@ impl ToolHandler for UnifiedExecHandler {
}
};
let content = format_response(&response);
Ok(FunctionToolOutput::from_text(content, Some(true)))
Ok(response)
}
}
@ -321,35 +328,6 @@ pub(crate) fn get_command(
Ok(shell.derive_exec_args(&args.cmd, use_login_shell))
}
fn format_response(response: &UnifiedExecResponse) -> String {
let mut sections = Vec::new();
if !response.chunk_id.is_empty() {
sections.push(format!("Chunk ID: {}", response.chunk_id));
}
let wall_time_seconds = response.wall_time.as_secs_f64();
sections.push(format!("Wall time: {wall_time_seconds:.4} seconds"));
if let Some(exit_code) = response.exit_code {
sections.push(format!("Process exited with code {exit_code}"));
}
if let Some(process_id) = &response.process_id {
// Training still uses "session ID".
sections.push(format!("Process running with session ID {process_id}"));
}
if let Some(original_token_count) = response.original_token_count {
sections.push(format!("Original token count: {original_token_count}"));
}
sections.push("Output:".to_string());
sections.push(response.output.clone());
sections.join("\n")
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -26,7 +26,6 @@ use std::collections::HashSet;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Weak;
use std::time::Duration;
use codex_network_proxy::NetworkProxy;
use codex_protocol::models::PermissionProfile;
@ -108,20 +107,6 @@ pub(crate) struct WriteStdinRequest<'a> {
pub max_output_tokens: Option<usize>,
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct UnifiedExecResponse {
pub event_call_id: String,
pub chunk_id: String,
pub wall_time: Duration,
pub output: String,
/// Raw bytes returned for this unified exec call before any truncation.
pub raw_output: Vec<u8>,
pub process_id: Option<String>,
pub exit_code: Option<i32>,
pub original_token_count: Option<usize>,
pub session_command: Option<Vec<String>>,
}
#[derive(Default)]
pub(crate) struct ProcessStore {
processes: HashMap<String, ProcessEntry>,
@ -192,6 +177,7 @@ mod tests {
use crate::codex::make_session_and_context;
use crate::protocol::AskForApproval;
use crate::protocol::SandboxPolicy;
use crate::tools::context::ExecCommandToolOutput;
use crate::unified_exec::ExecCommandRequest;
use crate::unified_exec::WriteStdinRequest;
use core_test_support::skip_if_sandbox;
@ -218,7 +204,7 @@ mod tests {
turn: &Arc<TurnContext>,
cmd: &str,
yield_time_ms: u64,
) -> Result<UnifiedExecResponse, UnifiedExecError> {
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
let context =
UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string());
let process_id = session
@ -255,7 +241,7 @@ mod tests {
process_id: &str,
input: &str,
yield_time_ms: u64,
) -> Result<UnifiedExecResponse, UnifiedExecError> {
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
session
.services
.unified_exec_manager
@ -330,7 +316,7 @@ mod tests {
)
.await?;
assert!(
out_2.output.contains("codex"),
out_2.truncated_output().contains("codex"),
"expected environment variable output"
);
@ -366,7 +352,7 @@ mod tests {
"short command should not report a process id if it exits quickly"
);
assert!(
!out_2.output.contains("codex"),
!out_2.truncated_output().contains("codex"),
"short command should run in a fresh shell"
);
@ -382,7 +368,7 @@ mod tests {
)
.await?;
assert!(
out_3.output.contains("codex"),
out_3.truncated_output().contains("codex"),
"session should preserve state"
);
@ -420,7 +406,7 @@ mod tests {
)
.await?;
assert!(
!out_2.output.contains(TEST_VAR_VALUE),
!out_2.truncated_output().contains(TEST_VAR_VALUE),
"timeout too short should yield incomplete output"
);
@ -429,7 +415,7 @@ mod tests {
let out_3 = write_stdin(&session, process_id, "", 100).await?;
assert!(
out_3.output.contains(TEST_VAR_VALUE),
out_3.truncated_output().contains(TEST_VAR_VALUE),
"subsequent poll should retrieve output"
);
@ -444,7 +430,7 @@ mod tests {
let result = exec_command(&session, &turn, "echo codex", 120_000).await?;
assert!(result.process_id.is_some());
assert!(result.output.contains("codex"));
assert!(result.truncated_output().contains("codex"));
Ok(())
}
@ -459,7 +445,7 @@ mod tests {
result.process_id.is_some(),
"completed command should report a process id"
);
assert!(result.output.contains("codex"));
assert!(result.truncated_output().contains("codex"));
assert!(
session

View file

@ -16,6 +16,7 @@ use crate::exec_env::create_env;
use crate::exec_policy::ExecApprovalRequest;
use crate::protocol::ExecCommandSource;
use crate::sandboxing::ExecRequest;
use crate::tools::context::ExecCommandToolOutput;
use crate::tools::events::ToolEmitter;
use crate::tools::events::ToolEventCtx;
use crate::tools::events::ToolEventStage;
@ -25,9 +26,7 @@ use crate::tools::orchestrator::ToolOrchestrator;
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
use crate::tools::sandboxing::ToolCtx;
use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::formatted_truncate_text;
use crate::unified_exec::ExecCommandRequest;
use crate::unified_exec::MAX_UNIFIED_EXEC_PROCESSES;
use crate::unified_exec::MAX_YIELD_TIME_MS;
@ -38,7 +37,6 @@ use crate::unified_exec::ProcessStore;
use crate::unified_exec::UnifiedExecContext;
use crate::unified_exec::UnifiedExecError;
use crate::unified_exec::UnifiedExecProcessManager;
use crate::unified_exec::UnifiedExecResponse;
use crate::unified_exec::WARNING_UNIFIED_EXEC_PROCESSES;
use crate::unified_exec::WriteStdinRequest;
use crate::unified_exec::async_watcher::emit_exec_end_for_unified_exec;
@ -51,7 +49,6 @@ use crate::unified_exec::process::OutputBuffer;
use crate::unified_exec::process::OutputHandles;
use crate::unified_exec::process::SpawnLifecycleHandle;
use crate::unified_exec::process::UnifiedExecProcess;
use crate::unified_exec::resolve_max_tokens;
const UNIFIED_EXEC_ENV: [(&str, &str); 10] = [
("NO_COLOR", "1"),
@ -159,7 +156,7 @@ impl UnifiedExecProcessManager {
&self,
request: ExecCommandRequest,
context: &UnifiedExecContext,
) -> Result<UnifiedExecResponse, UnifiedExecError> {
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
let cwd = request
.workdir
.clone()
@ -194,7 +191,6 @@ impl UnifiedExecProcessManager {
emitter.emit(event_ctx, ToolEventStage::Begin).await;
start_streaming_output(&process, context, Arc::clone(&transcript));
let max_tokens = resolve_max_tokens(request.max_output_tokens);
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
let start = Instant::now();
@ -221,7 +217,6 @@ impl UnifiedExecProcessManager {
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let exit_code = process.exit_code();
let has_exited = process.has_exited() || exit_code.is_some();
let chunk_id = generate_chunk_id();
@ -240,7 +235,7 @@ impl UnifiedExecProcessManager {
cwd.clone(),
Some(process_id),
Arc::clone(&transcript),
output.clone(),
text.clone(),
exit,
wall_time,
)
@ -276,12 +271,12 @@ impl UnifiedExecProcessManager {
};
let original_token_count = approx_token_count(&text);
let response = UnifiedExecResponse {
let response = ExecCommandToolOutput {
event_call_id: context.call_id.clone(),
chunk_id,
wall_time,
output,
raw_output: collected,
max_output_tokens: request.max_output_tokens,
process_id: if has_exited {
None
} else {
@ -298,7 +293,7 @@ impl UnifiedExecProcessManager {
pub(crate) async fn write_stdin(
&self,
request: WriteStdinRequest<'_>,
) -> Result<UnifiedExecResponse, UnifiedExecError> {
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
let process_id = request.process_id.to_string();
let PreparedProcessHandles {
@ -324,7 +319,6 @@ impl UnifiedExecProcessManager {
tokio::time::sleep(Duration::from_millis(100)).await;
}
let max_tokens = resolve_max_tokens(request.max_output_tokens);
let yield_time_ms = {
// Empty polls use configurable background timeout bounds. Non-empty
// writes keep a fixed max cap so interactive stdin remains responsive.
@ -349,7 +343,6 @@ impl UnifiedExecProcessManager {
let wall_time = Instant::now().saturating_duration_since(start);
let text = String::from_utf8_lossy(&collected).to_string();
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
let original_token_count = approx_token_count(&text);
let chunk_id = generate_chunk_id();
@ -375,12 +368,12 @@ impl UnifiedExecProcessManager {
}
};
let response = UnifiedExecResponse {
let response = ExecCommandToolOutput {
event_call_id,
chunk_id,
wall_time,
output,
raw_output: collected,
max_output_tokens: request.max_output_tokens,
process_id,
exit_code,
original_token_count: Some(original_token_count),