Move exec command truncation into ExecCommandToolOutput (#14169)
Summary - relocate truncation logic for exec command output into the new `ExecCommandToolOutput` response helper instead of centralized handler code - update all affected tools and unified exec handling to use the new response item structure and eliminate `Function(FunctionToolOutput)` responses - adjust context, registry, and handler interfaces to align with the new response semantics and error fields Testing - Not run (not requested)
This commit is contained in:
parent
0c33af7746
commit
a9ae43621b
4 changed files with 154 additions and 75 deletions
|
|
@ -3,7 +3,10 @@ use crate::codex::TurnContext;
|
|||
use crate::tools::TELEMETRY_PREVIEW_MAX_BYTES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_MAX_LINES;
|
||||
use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use crate::unified_exec::resolve_max_tokens;
|
||||
use codex_protocol::mcp::CallToolResult;
|
||||
use codex_protocol::models::FunctionCallOutputBody;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
|
|
@ -14,6 +17,7 @@ use codex_protocol::models::function_call_output_content_items_to_text;
|
|||
use codex_utils_string::take_bytes_at_char_boundary;
|
||||
use std::borrow::Cow;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use tokio::sync::Mutex;
|
||||
|
||||
pub type SharedTurnDiffTracker = Arc<Mutex<TurnDiffTracker>>;
|
||||
|
|
@ -116,6 +120,10 @@ impl FunctionToolOutput {
|
|||
success,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_text(self) -> String {
|
||||
function_call_output_content_items_to_text(&self.body).unwrap_or_default()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolOutput for FunctionToolOutput {
|
||||
|
|
@ -135,6 +143,77 @@ impl ToolOutput for FunctionToolOutput {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ExecCommandToolOutput {
|
||||
pub event_call_id: String,
|
||||
pub chunk_id: String,
|
||||
pub wall_time: Duration,
|
||||
/// Raw bytes returned for this unified exec call before any truncation.
|
||||
pub raw_output: Vec<u8>,
|
||||
pub max_output_tokens: Option<usize>,
|
||||
pub process_id: Option<String>,
|
||||
pub exit_code: Option<i32>,
|
||||
pub original_token_count: Option<usize>,
|
||||
pub session_command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
impl ToolOutput for ExecCommandToolOutput {
|
||||
fn log_preview(&self) -> String {
|
||||
telemetry_preview(&self.response_text())
|
||||
}
|
||||
|
||||
fn success_for_logging(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem {
|
||||
function_tool_response(
|
||||
call_id,
|
||||
payload,
|
||||
vec![FunctionCallOutputContentItem::InputText {
|
||||
text: self.response_text(),
|
||||
}],
|
||||
Some(true),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl ExecCommandToolOutput {
|
||||
pub(crate) fn truncated_output(&self) -> String {
|
||||
let text = String::from_utf8_lossy(&self.raw_output).to_string();
|
||||
let max_tokens = resolve_max_tokens(self.max_output_tokens);
|
||||
formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens))
|
||||
}
|
||||
|
||||
fn response_text(&self) -> String {
|
||||
let mut sections = Vec::new();
|
||||
|
||||
if !self.chunk_id.is_empty() {
|
||||
sections.push(format!("Chunk ID: {}", self.chunk_id));
|
||||
}
|
||||
|
||||
let wall_time_seconds = self.wall_time.as_secs_f64();
|
||||
sections.push(format!("Wall time: {wall_time_seconds:.4} seconds"));
|
||||
|
||||
if let Some(exit_code) = self.exit_code {
|
||||
sections.push(format!("Process exited with code {exit_code}"));
|
||||
}
|
||||
|
||||
if let Some(process_id) = &self.process_id {
|
||||
sections.push(format!("Process running with session ID {process_id}"));
|
||||
}
|
||||
|
||||
if let Some(original_token_count) = self.original_token_count {
|
||||
sections.push(format!("Original token count: {original_token_count}"));
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
sections.push(self.truncated_output());
|
||||
|
||||
sections.join("\n")
|
||||
}
|
||||
}
|
||||
|
||||
fn function_tool_response(
|
||||
call_id: &str,
|
||||
payload: &ToolPayload,
|
||||
|
|
@ -204,6 +283,7 @@ fn telemetry_preview(content: &str) -> String {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use core_test_support::assert_regex_match;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
|
|
@ -336,4 +416,46 @@ mod tests {
|
|||
assert!(lines.len() <= TELEMETRY_PREVIEW_MAX_LINES + 1);
|
||||
assert_eq!(lines.last(), Some(&TELEMETRY_PREVIEW_TRUNCATION_NOTICE));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exec_command_tool_output_formats_truncated_response() {
|
||||
let payload = ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
};
|
||||
let response = ExecCommandToolOutput {
|
||||
event_call_id: "call-42".to_string(),
|
||||
chunk_id: "abc123".to_string(),
|
||||
wall_time: std::time::Duration::from_millis(1250),
|
||||
raw_output: b"token one token two token three token four token five".to_vec(),
|
||||
max_output_tokens: Some(4),
|
||||
process_id: None,
|
||||
exit_code: Some(0),
|
||||
original_token_count: Some(10),
|
||||
session_command: None,
|
||||
}
|
||||
.into_response("call-42", &payload);
|
||||
|
||||
match response {
|
||||
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
||||
assert_eq!(call_id, "call-42");
|
||||
assert_eq!(output.success, Some(true));
|
||||
let text = output
|
||||
.body
|
||||
.to_text()
|
||||
.expect("exec output should serialize as text");
|
||||
assert_regex_match(
|
||||
r#"(?sx)
|
||||
^Chunk\ ID:\ abc123
|
||||
\nWall\ time:\ \d+\.\d{4}\ seconds
|
||||
\nProcess\ exited\ with\ code\ 0
|
||||
\nOriginal\ token\ count:\ 10
|
||||
\nOutput:
|
||||
\n.*tokens\ truncated.*
|
||||
$"#,
|
||||
&text,
|
||||
);
|
||||
}
|
||||
other => panic!("expected FunctionCallOutput, got {other:?}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ use crate::sandboxing::SandboxPermissions;
|
|||
use crate::shell::Shell;
|
||||
use crate::shell::get_shell_by_model_provided_path;
|
||||
use crate::skills::maybe_emit_implicit_skill_invocation;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::ExecCommandToolOutput;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::handlers::apply_granted_turn_permissions;
|
||||
|
|
@ -21,7 +21,6 @@ use crate::tools::registry::ToolKind;
|
|||
use crate::unified_exec::ExecCommandRequest;
|
||||
use crate::unified_exec::UnifiedExecContext;
|
||||
use crate::unified_exec::UnifiedExecProcessManager;
|
||||
use crate::unified_exec::UnifiedExecResponse;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
use async_trait::async_trait;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
|
|
@ -82,7 +81,7 @@ fn default_tty() -> bool {
|
|||
|
||||
#[async_trait]
|
||||
impl ToolHandler for UnifiedExecHandler {
|
||||
type Output = FunctionToolOutput;
|
||||
type Output = ExecCommandToolOutput;
|
||||
|
||||
fn kind(&self) -> ToolKind {
|
||||
ToolKind::Function
|
||||
|
|
@ -230,7 +229,17 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
.await?
|
||||
{
|
||||
manager.release_process_id(&process_id).await;
|
||||
return Ok(output);
|
||||
return Ok(ExecCommandToolOutput {
|
||||
event_call_id: String::new(),
|
||||
chunk_id: String::new(),
|
||||
wall_time: std::time::Duration::ZERO,
|
||||
raw_output: output.into_text().into_bytes(),
|
||||
max_output_tokens: None,
|
||||
process_id: None,
|
||||
exit_code: None,
|
||||
original_token_count: None,
|
||||
session_command: None,
|
||||
});
|
||||
}
|
||||
|
||||
manager
|
||||
|
|
@ -290,9 +299,7 @@ impl ToolHandler for UnifiedExecHandler {
|
|||
}
|
||||
};
|
||||
|
||||
let content = format_response(&response);
|
||||
|
||||
Ok(FunctionToolOutput::from_text(content, Some(true)))
|
||||
Ok(response)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -321,35 +328,6 @@ pub(crate) fn get_command(
|
|||
Ok(shell.derive_exec_args(&args.cmd, use_login_shell))
|
||||
}
|
||||
|
||||
fn format_response(response: &UnifiedExecResponse) -> String {
|
||||
let mut sections = Vec::new();
|
||||
|
||||
if !response.chunk_id.is_empty() {
|
||||
sections.push(format!("Chunk ID: {}", response.chunk_id));
|
||||
}
|
||||
|
||||
let wall_time_seconds = response.wall_time.as_secs_f64();
|
||||
sections.push(format!("Wall time: {wall_time_seconds:.4} seconds"));
|
||||
|
||||
if let Some(exit_code) = response.exit_code {
|
||||
sections.push(format!("Process exited with code {exit_code}"));
|
||||
}
|
||||
|
||||
if let Some(process_id) = &response.process_id {
|
||||
// Training still uses "session ID".
|
||||
sections.push(format!("Process running with session ID {process_id}"));
|
||||
}
|
||||
|
||||
if let Some(original_token_count) = response.original_token_count {
|
||||
sections.push(format!("Original token count: {original_token_count}"));
|
||||
}
|
||||
|
||||
sections.push("Output:".to_string());
|
||||
sections.push(response.output.clone());
|
||||
|
||||
sections.join("\n")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@ use std::collections::HashSet;
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Weak;
|
||||
use std::time::Duration;
|
||||
|
||||
use codex_network_proxy::NetworkProxy;
|
||||
use codex_protocol::models::PermissionProfile;
|
||||
|
|
@ -108,20 +107,6 @@ pub(crate) struct WriteStdinRequest<'a> {
|
|||
pub max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub(crate) struct UnifiedExecResponse {
|
||||
pub event_call_id: String,
|
||||
pub chunk_id: String,
|
||||
pub wall_time: Duration,
|
||||
pub output: String,
|
||||
/// Raw bytes returned for this unified exec call before any truncation.
|
||||
pub raw_output: Vec<u8>,
|
||||
pub process_id: Option<String>,
|
||||
pub exit_code: Option<i32>,
|
||||
pub original_token_count: Option<usize>,
|
||||
pub session_command: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct ProcessStore {
|
||||
processes: HashMap<String, ProcessEntry>,
|
||||
|
|
@ -192,6 +177,7 @@ mod tests {
|
|||
use crate::codex::make_session_and_context;
|
||||
use crate::protocol::AskForApproval;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::tools::context::ExecCommandToolOutput;
|
||||
use crate::unified_exec::ExecCommandRequest;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
|
|
@ -218,7 +204,7 @@ mod tests {
|
|||
turn: &Arc<TurnContext>,
|
||||
cmd: &str,
|
||||
yield_time_ms: u64,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
|
||||
let context =
|
||||
UnifiedExecContext::new(Arc::clone(session), Arc::clone(turn), "call".to_string());
|
||||
let process_id = session
|
||||
|
|
@ -255,7 +241,7 @@ mod tests {
|
|||
process_id: &str,
|
||||
input: &str,
|
||||
yield_time_ms: u64,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
|
||||
session
|
||||
.services
|
||||
.unified_exec_manager
|
||||
|
|
@ -330,7 +316,7 @@ mod tests {
|
|||
)
|
||||
.await?;
|
||||
assert!(
|
||||
out_2.output.contains("codex"),
|
||||
out_2.truncated_output().contains("codex"),
|
||||
"expected environment variable output"
|
||||
);
|
||||
|
||||
|
|
@ -366,7 +352,7 @@ mod tests {
|
|||
"short command should not report a process id if it exits quickly"
|
||||
);
|
||||
assert!(
|
||||
!out_2.output.contains("codex"),
|
||||
!out_2.truncated_output().contains("codex"),
|
||||
"short command should run in a fresh shell"
|
||||
);
|
||||
|
||||
|
|
@ -382,7 +368,7 @@ mod tests {
|
|||
)
|
||||
.await?;
|
||||
assert!(
|
||||
out_3.output.contains("codex"),
|
||||
out_3.truncated_output().contains("codex"),
|
||||
"session should preserve state"
|
||||
);
|
||||
|
||||
|
|
@ -420,7 +406,7 @@ mod tests {
|
|||
)
|
||||
.await?;
|
||||
assert!(
|
||||
!out_2.output.contains(TEST_VAR_VALUE),
|
||||
!out_2.truncated_output().contains(TEST_VAR_VALUE),
|
||||
"timeout too short should yield incomplete output"
|
||||
);
|
||||
|
||||
|
|
@ -429,7 +415,7 @@ mod tests {
|
|||
let out_3 = write_stdin(&session, process_id, "", 100).await?;
|
||||
|
||||
assert!(
|
||||
out_3.output.contains(TEST_VAR_VALUE),
|
||||
out_3.truncated_output().contains(TEST_VAR_VALUE),
|
||||
"subsequent poll should retrieve output"
|
||||
);
|
||||
|
||||
|
|
@ -444,7 +430,7 @@ mod tests {
|
|||
let result = exec_command(&session, &turn, "echo codex", 120_000).await?;
|
||||
|
||||
assert!(result.process_id.is_some());
|
||||
assert!(result.output.contains("codex"));
|
||||
assert!(result.truncated_output().contains("codex"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -459,7 +445,7 @@ mod tests {
|
|||
result.process_id.is_some(),
|
||||
"completed command should report a process id"
|
||||
);
|
||||
assert!(result.output.contains("codex"));
|
||||
assert!(result.truncated_output().contains("codex"));
|
||||
|
||||
assert!(
|
||||
session
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ use crate::exec_env::create_env;
|
|||
use crate::exec_policy::ExecApprovalRequest;
|
||||
use crate::protocol::ExecCommandSource;
|
||||
use crate::sandboxing::ExecRequest;
|
||||
use crate::tools::context::ExecCommandToolOutput;
|
||||
use crate::tools::events::ToolEmitter;
|
||||
use crate::tools::events::ToolEventCtx;
|
||||
use crate::tools::events::ToolEventStage;
|
||||
|
|
@ -25,9 +26,7 @@ use crate::tools::orchestrator::ToolOrchestrator;
|
|||
use crate::tools::runtimes::unified_exec::UnifiedExecRequest as UnifiedExecToolRequest;
|
||||
use crate::tools::runtimes::unified_exec::UnifiedExecRuntime;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
use crate::truncate::TruncationPolicy;
|
||||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::formatted_truncate_text;
|
||||
use crate::unified_exec::ExecCommandRequest;
|
||||
use crate::unified_exec::MAX_UNIFIED_EXEC_PROCESSES;
|
||||
use crate::unified_exec::MAX_YIELD_TIME_MS;
|
||||
|
|
@ -38,7 +37,6 @@ use crate::unified_exec::ProcessStore;
|
|||
use crate::unified_exec::UnifiedExecContext;
|
||||
use crate::unified_exec::UnifiedExecError;
|
||||
use crate::unified_exec::UnifiedExecProcessManager;
|
||||
use crate::unified_exec::UnifiedExecResponse;
|
||||
use crate::unified_exec::WARNING_UNIFIED_EXEC_PROCESSES;
|
||||
use crate::unified_exec::WriteStdinRequest;
|
||||
use crate::unified_exec::async_watcher::emit_exec_end_for_unified_exec;
|
||||
|
|
@ -51,7 +49,6 @@ use crate::unified_exec::process::OutputBuffer;
|
|||
use crate::unified_exec::process::OutputHandles;
|
||||
use crate::unified_exec::process::SpawnLifecycleHandle;
|
||||
use crate::unified_exec::process::UnifiedExecProcess;
|
||||
use crate::unified_exec::resolve_max_tokens;
|
||||
|
||||
const UNIFIED_EXEC_ENV: [(&str, &str); 10] = [
|
||||
("NO_COLOR", "1"),
|
||||
|
|
@ -159,7 +156,7 @@ impl UnifiedExecProcessManager {
|
|||
&self,
|
||||
request: ExecCommandRequest,
|
||||
context: &UnifiedExecContext,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
|
||||
let cwd = request
|
||||
.workdir
|
||||
.clone()
|
||||
|
|
@ -194,7 +191,6 @@ impl UnifiedExecProcessManager {
|
|||
emitter.emit(event_ctx, ToolEventStage::Begin).await;
|
||||
|
||||
start_streaming_output(&process, context, Arc::clone(&transcript));
|
||||
let max_tokens = resolve_max_tokens(request.max_output_tokens);
|
||||
let yield_time_ms = clamp_yield_time(request.yield_time_ms);
|
||||
|
||||
let start = Instant::now();
|
||||
|
|
@ -221,7 +217,6 @@ impl UnifiedExecProcessManager {
|
|||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let exit_code = process.exit_code();
|
||||
let has_exited = process.has_exited() || exit_code.is_some();
|
||||
let chunk_id = generate_chunk_id();
|
||||
|
|
@ -240,7 +235,7 @@ impl UnifiedExecProcessManager {
|
|||
cwd.clone(),
|
||||
Some(process_id),
|
||||
Arc::clone(&transcript),
|
||||
output.clone(),
|
||||
text.clone(),
|
||||
exit,
|
||||
wall_time,
|
||||
)
|
||||
|
|
@ -276,12 +271,12 @@ impl UnifiedExecProcessManager {
|
|||
};
|
||||
|
||||
let original_token_count = approx_token_count(&text);
|
||||
let response = UnifiedExecResponse {
|
||||
let response = ExecCommandToolOutput {
|
||||
event_call_id: context.call_id.clone(),
|
||||
chunk_id,
|
||||
wall_time,
|
||||
output,
|
||||
raw_output: collected,
|
||||
max_output_tokens: request.max_output_tokens,
|
||||
process_id: if has_exited {
|
||||
None
|
||||
} else {
|
||||
|
|
@ -298,7 +293,7 @@ impl UnifiedExecProcessManager {
|
|||
pub(crate) async fn write_stdin(
|
||||
&self,
|
||||
request: WriteStdinRequest<'_>,
|
||||
) -> Result<UnifiedExecResponse, UnifiedExecError> {
|
||||
) -> Result<ExecCommandToolOutput, UnifiedExecError> {
|
||||
let process_id = request.process_id.to_string();
|
||||
|
||||
let PreparedProcessHandles {
|
||||
|
|
@ -324,7 +319,6 @@ impl UnifiedExecProcessManager {
|
|||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
|
||||
let max_tokens = resolve_max_tokens(request.max_output_tokens);
|
||||
let yield_time_ms = {
|
||||
// Empty polls use configurable background timeout bounds. Non-empty
|
||||
// writes keep a fixed max cap so interactive stdin remains responsive.
|
||||
|
|
@ -349,7 +343,6 @@ impl UnifiedExecProcessManager {
|
|||
let wall_time = Instant::now().saturating_duration_since(start);
|
||||
|
||||
let text = String::from_utf8_lossy(&collected).to_string();
|
||||
let output = formatted_truncate_text(&text, TruncationPolicy::Tokens(max_tokens));
|
||||
let original_token_count = approx_token_count(&text);
|
||||
let chunk_id = generate_chunk_id();
|
||||
|
||||
|
|
@ -375,12 +368,12 @@ impl UnifiedExecProcessManager {
|
|||
}
|
||||
};
|
||||
|
||||
let response = UnifiedExecResponse {
|
||||
let response = ExecCommandToolOutput {
|
||||
event_call_id,
|
||||
chunk_id,
|
||||
wall_time,
|
||||
output,
|
||||
raw_output: collected,
|
||||
max_output_tokens: request.max_output_tokens,
|
||||
process_id,
|
||||
exit_code,
|
||||
original_token_count: Some(original_token_count),
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue