Inline response recording and remove process_items indirection (#7310)

- Inline response recording during streaming: `run_turn` now records
items as they arrive instead of building a `ProcessedResponseItem` list
and post‑processing via `process_items`.
- Simplify turn handling: `handle_output_item_done` returns the
follow‑up signal + optional tool future; `needs_follow_up` is set only
there, and in‑flight tool futures are drained once at the end (errors
logged, no extra state writes).
- Flattened stream loop: removed `process_items` indirection and the
extra output queue
- - Tests: relaxed `tool_parallelism::tool_results_grouped` to allow any
completion order while still requiring matching call/output IDs.
This commit is contained in:
Ahmed Ibrahim 2025-12-04 12:17:54 -08:00 committed by GitHub
parent 7dfc3a4dc7
commit 6e6338aa87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 288 additions and 212 deletions

View file

@ -13,11 +13,12 @@ use crate::compact::should_use_remote_compact_task;
use crate::compact_remote::run_inline_remote_auto_compact_task;
use crate::features::Feature;
use crate::features::Features;
use crate::function_tool::FunctionCallError;
use crate::openai_models::models_manager::ModelsManager;
use crate::parse_command::parse_command;
use crate::parse_turn_item;
use crate::response_processing::process_items;
use crate::stream_events_utils::HandleOutputCtx;
use crate::stream_events_utils::handle_non_tool_response_item;
use crate::stream_events_utils::handle_output_item_done;
use crate::terminal;
use crate::truncate::TruncationPolicy;
use crate::user_notification::UserNotifier;
@ -131,7 +132,6 @@ use codex_execpolicy::Policy as ExecPolicy;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::models::ContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
@ -827,7 +827,7 @@ impl Session {
}
}
async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
pub(crate) async fn emit_turn_item_started(&self, turn_context: &TurnContext, item: &TurnItem) {
self.send_event(
turn_context,
EventMsg::ItemStarted(ItemStartedEvent {
@ -839,7 +839,11 @@ impl Session {
.await;
}
async fn emit_turn_item_completed(&self, turn_context: &TurnContext, item: TurnItem) {
pub(crate) async fn emit_turn_item_completed(
&self,
turn_context: &TurnContext,
item: TurnItem,
) {
self.send_event(
turn_context,
EventMsg::ItemCompleted(ItemCompletedEvent {
@ -2060,15 +2064,16 @@ pub(crate) async fn run_task(
.await
{
Ok(turn_output) => {
let processed_items = turn_output;
let TurnRunResult {
needs_follow_up,
last_agent_message: turn_last_agent_message,
} = turn_output;
let limit = turn_context
.client
.get_auto_compact_token_limit()
.unwrap_or(i64::MAX);
let total_usage_tokens = sess.get_total_token_usage().await;
let token_limit_reached = total_usage_tokens >= limit;
let (responses, items_to_record_in_conversation_history) =
process_items(processed_items, &sess, &turn_context).await;
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
if token_limit_reached {
@ -2081,10 +2086,8 @@ pub(crate) async fn run_task(
continue;
}
if responses.is_empty() {
last_agent_message = get_last_assistant_message_from_turn(
&items_to_record_in_conversation_history,
);
if !needs_follow_up {
last_agent_message = turn_last_agent_message;
sess.notifier()
.notify(&UserNotification::AgentTurnComplete {
thread_id: sess.conversation_id.to_string(),
@ -2097,10 +2100,7 @@ pub(crate) async fn run_task(
}
continue;
}
Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
}) => {
let _ = process_items(processed_items, &sess, &turn_context).await;
Err(CodexErr::TurnAborted) => {
// Aborted turn is reported via a different event.
break;
}
@ -2130,7 +2130,7 @@ async fn run_turn(
turn_diff_tracker: SharedTurnDiffTracker,
input: Vec<ResponseItem>,
cancellation_token: CancellationToken,
) -> CodexResult<Vec<ProcessedResponseItem>> {
) -> CodexResult<TurnRunResult> {
let mcp_tools = sess
.services
.mcp_connection_manager
@ -2184,13 +2184,10 @@ async fn run_turn(
)
.await
{
// todo(aibrahim): map special cases and ? on other errors
Ok(output) => return Ok(output),
Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
}) => {
return Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
});
Err(CodexErr::TurnAborted) => {
return Err(CodexErr::TurnAborted);
}
Err(CodexErr::Interrupted) => return Err(CodexErr::Interrupted),
Err(CodexErr::EnvVar(var)) => return Err(CodexErr::EnvVar(var)),
@ -2243,14 +2240,29 @@ async fn run_turn(
}
}
/// When the model is prompted, it returns a stream of events. Some of these
/// events map to a `ResponseItem`. A `ResponseItem` may need to be
/// "handled" such that it produces a `ResponseInputItem` that needs to be
/// sent back to the model on the next turn.
#[derive(Debug)]
pub struct ProcessedResponseItem {
pub item: ResponseItem,
pub response: Option<ResponseInputItem>,
struct TurnRunResult {
needs_follow_up: bool,
last_agent_message: Option<String>,
}
async fn drain_in_flight(
in_flight: &mut FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>>,
sess: Arc<Session>,
turn_context: Arc<TurnContext>,
) -> CodexResult<()> {
while let Some(res) = in_flight.next().await {
match res {
Ok(response_input) => {
sess.record_conversation_items(&turn_context, &[response_input.into()])
.await;
}
Err(err) => {
error_or_panic(format!("in-flight tool future failed during drain: {err}"));
}
}
}
Ok(())
}
#[allow(clippy::too_many_arguments)]
@ -2261,7 +2273,7 @@ async fn try_run_turn(
turn_diff_tracker: SharedTurnDiffTracker,
prompt: &Prompt,
cancellation_token: CancellationToken,
) -> CodexResult<Vec<ProcessedResponseItem>> {
) -> CodexResult<TurnRunResult> {
let rollout_item = RolloutItem::TurnContext(TurnContextItem {
cwd: turn_context.cwd.clone(),
approval_policy: turn_context.approval_policy,
@ -2285,114 +2297,47 @@ async fn try_run_turn(
Arc::clone(&turn_context),
Arc::clone(&turn_diff_tracker),
);
let mut output: FuturesOrdered<BoxFuture<CodexResult<ProcessedResponseItem>>> =
let mut in_flight: FuturesOrdered<BoxFuture<'static, CodexResult<ResponseInputItem>>> =
FuturesOrdered::new();
let mut needs_follow_up = false;
let mut last_agent_message: Option<String> = None;
let mut active_item: Option<TurnItem> = None;
loop {
// Poll the next item from the model stream. We must inspect *both* Ok and Err
// cases so that transient stream failures (e.g., dropped SSE connection before
// `response.completed`) bubble up and trigger the caller's retry logic.
let outcome: CodexResult<TurnRunResult> = loop {
let event = match stream.next().or_cancel(&cancellation_token).await {
Ok(event) => event,
Err(codex_async_utils::CancelErr::Cancelled) => {
let processed_items = output.try_collect().await?;
return Err(CodexErr::TurnAborted {
dangling_artifacts: processed_items,
});
}
Err(codex_async_utils::CancelErr::Cancelled) => break Err(CodexErr::TurnAborted),
};
let event = match event {
Some(res) => res?,
None => {
return Err(CodexErr::Stream(
break Err(CodexErr::Stream(
"stream closed before response.completed".into(),
None,
));
}
};
let add_completed = &mut |response_item: ProcessedResponseItem| {
output.push_back(future::ready(Ok(response_item)).boxed());
};
match event {
ResponseEvent::Created => {}
ResponseEvent::OutputItemDone(item) => {
let previously_active_item = active_item.take();
match ToolRouter::build_tool_call(sess.as_ref(), item.clone()).await {
Ok(Some(call)) => {
let payload_preview = call.payload.log_payload().into_owned();
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
let mut ctx = HandleOutputCtx {
sess: sess.clone(),
turn_context: turn_context.clone(),
tool_runtime: tool_runtime.clone(),
cancellation_token: cancellation_token.child_token(),
};
let response =
tool_runtime.handle_tool_call(call, cancellation_token.child_token());
output.push_back(
async move {
Ok(ProcessedResponseItem {
item,
response: Some(response.await?),
})
}
.boxed(),
);
}
Ok(None) => {
if let Some(turn_item) = handle_non_tool_response_item(&item).await {
if previously_active_item.is_none() {
sess.emit_turn_item_started(&turn_context, &turn_item).await;
}
sess.emit_turn_item_completed(&turn_context, turn_item)
.await;
}
add_completed(ProcessedResponseItem {
item,
response: None,
});
}
Err(FunctionCallError::MissingLocalShellCallId) => {
let msg = "LocalShellCall without call_id or id";
turn_context
.client
.get_otel_event_manager()
.log_tool_failed("local_shell", msg);
error!(msg);
let response = ResponseInputItem::FunctionCallOutput {
call_id: String::new(),
output: FunctionCallOutputPayload {
content: msg.to_string(),
..Default::default()
},
};
add_completed(ProcessedResponseItem {
item,
response: Some(response),
});
}
Err(FunctionCallError::RespondToModel(message))
| Err(FunctionCallError::Denied(message)) => {
let response = ResponseInputItem::FunctionCallOutput {
call_id: String::new(),
output: FunctionCallOutputPayload {
content: message,
..Default::default()
},
};
add_completed(ProcessedResponseItem {
item,
response: Some(response),
});
}
Err(FunctionCallError::Fatal(message)) => {
return Err(CodexErr::Fatal(message));
}
let output_result =
handle_output_item_done(&mut ctx, item, previously_active_item).await?;
if let Some(tool_future) = output_result.tool_future {
in_flight.push_back(tool_future);
}
if let Some(agent_message) = output_result.last_agent_message {
last_agent_message = Some(agent_message);
}
needs_follow_up |= output_result.needs_follow_up;
}
ResponseEvent::OutputItemAdded(item) => {
if let Some(turn_item) = handle_non_tool_response_item(&item).await {
@ -2413,7 +2358,6 @@ async fn try_run_turn(
} => {
sess.update_token_usage_info(&turn_context, token_usage.as_ref())
.await;
let processed_items = output.try_collect().await?;
let unified_diff = {
let mut tracker = turn_diff_tracker.lock().await;
tracker.get_unified_diff()
@ -2423,7 +2367,10 @@ async fn try_run_turn(
sess.send_event(&turn_context, msg).await;
}
return Ok(processed_items);
break Ok(TurnRunResult {
needs_follow_up,
last_agent_message,
});
}
ResponseEvent::OutputTextDelta(delta) => {
// In review child threads, suppress assistant text deltas; the
@ -2490,22 +2437,11 @@ async fn try_run_turn(
}
}
}
}
}
};
async fn handle_non_tool_response_item(item: &ResponseItem) -> Option<TurnItem> {
debug!(?item, "Output item");
drain_in_flight(&mut in_flight, sess, turn_context).await?;
match item {
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. } => parse_turn_item(item),
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
debug!("unexpected tool output from stream");
None
}
_ => None,
}
outcome
}
pub(super) fn get_last_assistant_message_from_turn(responses: &[ResponseItem]) -> Option<String> {
@ -2540,8 +2476,10 @@ mod tests {
use crate::config::ConfigOverrides;
use crate::config::ConfigToml;
use crate::exec::ExecToolCallOutput;
use crate::function_tool::FunctionCallError;
use crate::shell::default_user_shell;
use crate::tools::format_exec_output_str;
use codex_protocol::models::FunctionCallOutputPayload;
use crate::protocol::CompactedItem;
use crate::protocol::CreditsSnapshot;

View file

@ -1,4 +1,3 @@
use crate::codex::ProcessedResponseItem;
use crate::exec::ExecToolCallOutput;
use crate::token_data::KnownPlan;
use crate::token_data::PlanType;
@ -61,9 +60,7 @@ pub enum SandboxErr {
pub enum CodexErr {
// todo(aibrahim): git rid of this error carrying the dangling artifacts
#[error("turn aborted. Something went wrong? Hit `/feedback` to report the issue.")]
TurnAborted {
dangling_artifacts: Vec<ProcessedResponseItem>,
},
TurnAborted,
/// Returned by ResponsesClient when the SSE stream disconnects or errors out **after** the HTTP
/// handshake has succeeded but **before** it finished emitting `response.completed`.
@ -181,9 +178,7 @@ pub enum CodexErr {
impl From<CancelErr> for CodexErr {
fn from(_: CancelErr) -> Self {
CodexErr::TurnAborted {
dangling_artifacts: Vec::new(),
}
CodexErr::TurnAborted
}
}

View file

@ -41,8 +41,8 @@ mod message_history;
mod model_provider_info;
pub mod parse_command;
pub mod powershell;
mod response_processing;
pub mod sandboxing;
mod stream_events_utils;
mod text_encoding;
pub mod token_data;
mod truncate;

View file

@ -1,70 +0,0 @@
use crate::codex::Session;
use crate::codex::TurnContext;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use tracing::warn;
/// Process streamed `ResponseItem`s from the model into the pair of:
/// - items we should record in conversation history; and
/// - `ResponseInputItem`s to send back to the model on the next turn.
pub(crate) async fn process_items(
processed_items: Vec<crate::codex::ProcessedResponseItem>,
sess: &Session,
turn_context: &TurnContext,
) -> (Vec<ResponseInputItem>, Vec<ResponseItem>) {
let mut outputs_to_record = Vec::<ResponseItem>::new();
let mut new_inputs_to_record = Vec::<ResponseItem>::new();
let mut responses = Vec::<ResponseInputItem>::new();
for processed_response_item in processed_items {
let crate::codex::ProcessedResponseItem { item, response } = processed_response_item;
if let Some(response) = &response {
responses.push(response.clone());
}
match response {
Some(ResponseInputItem::FunctionCallOutput { call_id, output }) => {
new_inputs_to_record.push(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
});
}
Some(ResponseInputItem::CustomToolCallOutput { call_id, output }) => {
new_inputs_to_record.push(ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: output.clone(),
});
}
Some(ResponseInputItem::McpToolCallOutput { call_id, result }) => {
let output = match result {
Ok(call_tool_result) => FunctionCallOutputPayload::from(&call_tool_result),
Err(err) => FunctionCallOutputPayload {
content: err.clone(),
success: Some(false),
..Default::default()
},
};
new_inputs_to_record.push(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output,
});
}
None => {}
_ => {
warn!("Unexpected response item: {item:?} with response: {response:?}");
}
};
outputs_to_record.push(item);
}
let all_items_to_record = [outputs_to_record, new_inputs_to_record].concat();
// Only attempt to take the lock if there is something to record.
if !all_items_to_record.is_empty() {
sess.record_conversation_items(turn_context, &all_items_to_record)
.await;
}
(responses, all_items_to_record)
}

View file

@ -0,0 +1,212 @@
use std::pin::Pin;
use std::sync::Arc;
use codex_protocol::items::TurnItem;
use tokio_util::sync::CancellationToken;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::error::CodexErr;
use crate::error::Result;
use crate::function_tool::FunctionCallError;
use crate::parse_turn_item;
use crate::tools::parallel::ToolCallRuntime;
use crate::tools::router::ToolRouter;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use futures::Future;
use tracing::debug;
/// Handle a completed output item from the model stream, recording it and
/// queuing any tool execution futures. This records items immediately so
/// history and rollout stay in sync even if the turn is later cancelled.
pub(crate) type InFlightFuture<'f> =
Pin<Box<dyn Future<Output = Result<ResponseInputItem>> + Send + 'f>>;
#[derive(Default)]
pub(crate) struct OutputItemResult {
pub last_agent_message: Option<String>,
pub needs_follow_up: bool,
pub tool_future: Option<InFlightFuture<'static>>,
}
pub(crate) struct HandleOutputCtx {
pub sess: Arc<Session>,
pub turn_context: Arc<TurnContext>,
pub tool_runtime: ToolCallRuntime,
pub cancellation_token: CancellationToken,
}
pub(crate) async fn handle_output_item_done(
ctx: &mut HandleOutputCtx,
item: ResponseItem,
previously_active_item: Option<TurnItem>,
) -> Result<OutputItemResult> {
let mut output = OutputItemResult::default();
match ToolRouter::build_tool_call(ctx.sess.as_ref(), item.clone()).await {
// The model emitted a tool call; log it, persist the item immediately, and queue the tool execution.
Ok(Some(call)) => {
let payload_preview = call.payload.log_payload().into_owned();
tracing::info!("ToolCall: {} {}", call.tool_name, payload_preview);
ctx.sess
.record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item))
.await;
let cancellation_token = ctx.cancellation_token.child_token();
let tool_runtime = ctx.tool_runtime.clone();
let tool_future: InFlightFuture<'static> = Box::pin(async move {
let response_input = tool_runtime
.handle_tool_call(call, cancellation_token)
.await?;
Ok(response_input)
});
output.needs_follow_up = true;
output.tool_future = Some(tool_future);
}
// No tool call: convert messages/reasoning into turn items and mark them as complete.
Ok(None) => {
if let Some(turn_item) = handle_non_tool_response_item(&item).await {
if previously_active_item.is_none() {
ctx.sess
.emit_turn_item_started(&ctx.turn_context, &turn_item)
.await;
}
ctx.sess
.emit_turn_item_completed(&ctx.turn_context, turn_item)
.await;
}
ctx.sess
.record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item))
.await;
let last_agent_message = last_assistant_message_from_item(&item);
output.last_agent_message = last_agent_message;
}
// Guardrail: the model issued a LocalShellCall without an id; surface the error back into history.
Err(FunctionCallError::MissingLocalShellCallId) => {
let msg = "LocalShellCall without call_id or id";
ctx.turn_context
.client
.get_otel_event_manager()
.log_tool_failed("local_shell", msg);
tracing::error!(msg);
let response = ResponseInputItem::FunctionCallOutput {
call_id: String::new(),
output: FunctionCallOutputPayload {
content: msg.to_string(),
..Default::default()
},
};
ctx.sess
.record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item))
.await;
if let Some(response_item) = response_input_to_response_item(&response) {
ctx.sess
.record_conversation_items(
&ctx.turn_context,
std::slice::from_ref(&response_item),
)
.await;
}
output.needs_follow_up = true;
}
// The tool request should be answered directly (or was denied); push that response into the transcript.
Err(FunctionCallError::RespondToModel(message))
| Err(FunctionCallError::Denied(message)) => {
let response = ResponseInputItem::FunctionCallOutput {
call_id: String::new(),
output: FunctionCallOutputPayload {
content: message,
..Default::default()
},
};
ctx.sess
.record_conversation_items(&ctx.turn_context, std::slice::from_ref(&item))
.await;
if let Some(response_item) = response_input_to_response_item(&response) {
ctx.sess
.record_conversation_items(
&ctx.turn_context,
std::slice::from_ref(&response_item),
)
.await;
}
output.needs_follow_up = true;
}
// A fatal error occurred; surface it back into history.
Err(FunctionCallError::Fatal(message)) => {
return Err(CodexErr::Fatal(message));
}
}
Ok(output)
}
pub(crate) async fn handle_non_tool_response_item(item: &ResponseItem) -> Option<TurnItem> {
debug!(?item, "Output item");
match item {
ResponseItem::Message { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. } => parse_turn_item(item),
ResponseItem::FunctionCallOutput { .. } | ResponseItem::CustomToolCallOutput { .. } => {
debug!("unexpected tool output from stream");
None
}
_ => None,
}
}
pub(crate) fn last_assistant_message_from_item(item: &ResponseItem) -> Option<String> {
if let ResponseItem::Message { role, content, .. } = item
&& role == "assistant"
{
return content.iter().rev().find_map(|ci| match ci {
codex_protocol::models::ContentItem::OutputText { text } => Some(text.clone()),
_ => None,
});
}
None
}
pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Option<ResponseItem> {
match input {
ResponseInputItem::FunctionCallOutput { call_id, output } => {
Some(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output: output.clone(),
})
}
ResponseInputItem::CustomToolCallOutput { call_id, output } => {
Some(ResponseItem::CustomToolCallOutput {
call_id: call_id.clone(),
output: output.clone(),
})
}
ResponseInputItem::McpToolCallOutput { call_id, result } => {
let output = match result {
Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result),
Err(err) => FunctionCallOutputPayload {
content: err.clone(),
success: Some(false),
..Default::default()
},
};
Some(ResponseItem::FunctionCallOutput {
call_id: call_id.clone(),
output,
})
}
_ => None,
}
}

View file

@ -17,6 +17,7 @@ use crate::tools::router::ToolRouter;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::ResponseInputItem;
#[derive(Clone)]
pub(crate) struct ToolCallRuntime {
router: Arc<ToolRouter>,
session: Arc<Session>,