Handle orphan exec ends without clobbering active exploring cell (#12313)

Summary
- distinguish exec end handling targets (active tracking, active orphan
history, new cell) so unified exec responses don’t clobber unrelated
exploring cells
- ensure orphan ends flush existing exploring history when complete,
insert standalone history entries, and keep active cells correct
- add regression tests plus a snapshot covering the new behavior and
expose the ExecCell completion result for verification

Fix for https://github.com/openai/codex/issues/12278

---------

Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
jif-oai 2026-02-22 14:26:58 +00:00 committed by GitHub
parent 4666a6e631
commit 0a0caa9df2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 266 additions and 45 deletions

View file

@ -2357,7 +2357,25 @@ impl ChatWidget {
elapsed
}
/// Finalizes an exec call while preserving the active exec cell grouping contract.
///
/// Exec begin/end events usually pair through `running_commands`, but unified exec can emit an
/// end event for a call that was never materialized as the current active `ExecCell` (for
/// example, when another exploring group is still active). In that case we render the end as a
/// standalone history entry instead of replacing or flushing the unrelated active exploring
/// cell. If this method treated every unknown end as "complete the active cell", the UI could
/// merge unrelated commands and hide still-running exploring work.
pub(crate) fn handle_exec_end_now(&mut self, ev: ExecCommandEndEvent) {
enum ExecEndTarget {
// Normal case: the active exec cell already tracks this call id.
ActiveTracked,
// We have an active exec group, but it does not contain this call id. Render the end
// as a standalone finalized history cell so the active group remains intact.
OrphanHistoryWhileActiveExec,
// No active exec cell can safely own this end; build a new cell from the end payload.
NewCell,
}
let running = self.running_commands.remove(&ev.call_id);
if self.suppressed_exec_calls.remove(&ev.call_id) {
return;
@ -2368,49 +2386,96 @@ impl ChatWidget {
};
let is_unified_exec_interaction =
matches!(source, ExecCommandSource::UnifiedExecInteraction);
let needs_new = self
.active_cell
.as_ref()
.map(|cell| cell.as_any().downcast_ref::<ExecCell>().is_none())
.unwrap_or(true);
if needs_new {
self.flush_active_cell();
self.active_cell = Some(Box::new(new_active_exec_command(
ev.call_id.clone(),
command,
parsed,
source,
ev.interaction_input.clone(),
self.config.animations,
)));
}
if let Some(cell) = self
.active_cell
.as_mut()
.and_then(|c| c.as_any_mut().downcast_mut::<ExecCell>())
{
let output = if is_unified_exec_interaction {
CommandOutput {
exit_code: ev.exit_code,
formatted_output: String::new(),
aggregated_output: String::new(),
let end_target = match self.active_cell.as_ref() {
Some(cell) => match cell.as_any().downcast_ref::<ExecCell>() {
Some(exec_cell)
if exec_cell
.iter_calls()
.any(|call| call.call_id == ev.call_id) =>
{
ExecEndTarget::ActiveTracked
}
} else {
CommandOutput {
exit_code: ev.exit_code,
formatted_output: ev.formatted_output.clone(),
aggregated_output: ev.aggregated_output.clone(),
Some(exec_cell) if exec_cell.is_active() => {
ExecEndTarget::OrphanHistoryWhileActiveExec
}
};
cell.complete_call(&ev.call_id, output, ev.duration);
if cell.should_flush() {
self.flush_active_cell();
} else {
self.bump_active_cell_revision();
Some(_) | None => ExecEndTarget::NewCell,
},
None => ExecEndTarget::NewCell,
};
// Unified exec interaction rows intentionally hide command output text in the exec cell and
// instead render the interaction-specific content elsewhere in the UI.
let output = if is_unified_exec_interaction {
CommandOutput {
exit_code: ev.exit_code,
formatted_output: String::new(),
aggregated_output: String::new(),
}
} else {
CommandOutput {
exit_code: ev.exit_code,
formatted_output: ev.formatted_output.clone(),
aggregated_output: ev.aggregated_output.clone(),
}
};
match end_target {
ExecEndTarget::ActiveTracked => {
if let Some(cell) = self
.active_cell
.as_mut()
.and_then(|c| c.as_any_mut().downcast_mut::<ExecCell>())
{
let completed = cell.complete_call(&ev.call_id, output, ev.duration);
debug_assert!(completed, "active exec cell should contain {}", ev.call_id);
if cell.should_flush() {
self.flush_active_cell();
} else {
self.bump_active_cell_revision();
self.request_redraw();
}
}
}
ExecEndTarget::OrphanHistoryWhileActiveExec => {
let mut orphan = new_active_exec_command(
ev.call_id.clone(),
command,
parsed,
source,
ev.interaction_input.clone(),
self.config.animations,
);
let completed = orphan.complete_call(&ev.call_id, output, ev.duration);
debug_assert!(
completed,
"new orphan exec cell should contain {}",
ev.call_id
);
self.needs_final_message_separator = true;
self.app_event_tx
.send(AppEvent::InsertHistoryCell(Box::new(orphan)));
self.request_redraw();
}
ExecEndTarget::NewCell => {
self.flush_active_cell();
let mut cell = new_active_exec_command(
ev.call_id.clone(),
command,
parsed,
source,
ev.interaction_input.clone(),
self.config.animations,
);
let completed = cell.complete_call(&ev.call_id, output, ev.duration);
debug_assert!(completed, "new exec cell should contain {}", ev.call_id);
if cell.should_flush() {
self.add_to_history(cell);
} else {
self.active_cell = Some(Box::new(cell));
self.bump_active_cell_revision();
self.request_redraw();
}
}
}
// Mark that actual work was done (command executed)
self.had_work_activity = true;

View file

@ -0,0 +1,11 @@
---
source: tui/src/chatwidget/tests.rs
expression: snapshot
---
History:
• Ran echo repro-marker
└ repro-marker
Active:
• Exploring
└ Read null

View file

@ -3531,6 +3531,114 @@ async fn exec_end_without_begin_uses_event_command() {
);
}
#[tokio::test]
async fn exec_end_without_begin_does_not_flush_unrelated_running_exploring_cell() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
chat.on_task_started();
begin_exec(&mut chat, "call-exploring", "cat /dev/null");
assert!(drain_insert_history(&mut rx).is_empty());
assert!(active_blob(&chat).contains("Read null"));
let orphan =
begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker");
assert!(drain_insert_history(&mut rx).is_empty());
end_exec(&mut chat, orphan, "repro-marker\n", "", 0);
let cells = drain_insert_history(&mut rx);
assert_eq!(cells.len(), 1, "only the orphan end should be inserted");
let orphan_blob = lines_to_single_string(&cells[0]);
assert!(
orphan_blob.contains("• Ran echo repro-marker"),
"expected orphan end to render a standalone entry: {orphan_blob:?}"
);
let active = active_blob(&chat);
assert!(
active.contains("• Exploring"),
"expected unrelated exploring call to remain active: {active:?}"
);
assert!(
active.contains("Read null"),
"expected active exploring command to remain visible: {active:?}"
);
assert!(
!active.contains("echo repro-marker"),
"orphaned end should not replace the active exploring cell: {active:?}"
);
}
#[tokio::test]
async fn exec_end_without_begin_flushes_completed_unrelated_exploring_cell() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
chat.on_task_started();
let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la");
end_exec(&mut chat, begin_ls, "", "", 0);
assert!(drain_insert_history(&mut rx).is_empty());
assert!(active_blob(&chat).contains("ls -la"));
let orphan = begin_unified_exec_startup(&mut chat, "call-after", "proc-1", "echo after");
end_exec(&mut chat, orphan, "after\n", "", 0);
let cells = drain_insert_history(&mut rx);
assert_eq!(
cells.len(),
2,
"completed exploring cell should flush before the orphan entry"
);
let first = lines_to_single_string(&cells[0]);
let second = lines_to_single_string(&cells[1]);
assert!(
first.contains("• Explored"),
"expected flushed exploring cell: {first:?}"
);
assert!(
first.contains("List ls -la"),
"expected flushed exploring cell: {first:?}"
);
assert!(
second.contains("• Ran echo after"),
"expected orphan end entry after flush: {second:?}"
);
assert!(
chat.active_cell.is_none(),
"both entries should be finalized"
);
}
#[tokio::test]
async fn overlapping_exploring_exec_end_is_not_misclassified_as_orphan() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la");
let begin_cat = begin_exec(&mut chat, "call-cat", "cat foo.txt");
assert!(drain_insert_history(&mut rx).is_empty());
end_exec(&mut chat, begin_ls, "foo.txt\n", "", 0);
let cells = drain_insert_history(&mut rx);
assert!(
cells.is_empty(),
"tracked end inside an exploring cell should not render as an orphan"
);
let active = active_blob(&chat);
assert!(
active.contains("List ls -la"),
"expected first command still grouped: {active:?}"
);
assert!(
active.contains("Read foo.txt"),
"expected second running command to stay in the same active cell: {active:?}"
);
assert!(
active.contains("• Exploring"),
"expected grouped exploring header to remain active: {active:?}"
);
end_exec(&mut chat, begin_cat, "hello\n", "", 0);
}
#[tokio::test]
async fn exec_history_shows_unified_exec_startup_commands() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
@ -3575,6 +3683,29 @@ async fn exec_history_shows_unified_exec_tool_calls() {
assert_eq!(blob, "• Explored\n └ List ls\n");
}
#[tokio::test]
async fn unified_exec_unknown_end_with_active_exploring_cell_snapshot() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
chat.on_task_started();
begin_exec(&mut chat, "call-exploring", "cat /dev/null");
let orphan =
begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker");
end_exec(&mut chat, orphan, "repro-marker\n", "", 0);
let cells = drain_insert_history(&mut rx);
let history = cells
.iter()
.map(|lines| lines_to_single_string(lines))
.collect::<String>();
let active = active_blob(&chat);
let snapshot = format!("History:\n{history}\nActive:\n{active}");
assert_snapshot!(
"unified_exec_unknown_end_with_active_exploring_cell",
snapshot
);
}
#[tokio::test]
async fn unified_exec_end_after_task_complete_is_suppressed() {
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;

View file

@ -1,3 +1,10 @@
//! Data model for grouped exec-call history cells in the TUI transcript.
//!
//! An `ExecCell` can represent either a single command or an "exploring" group of related read/
//! list/search commands. The chat widget relies on stable `call_id` matching to route progress and
//! end events into the right cell, and it treats "call id not found" as a real signal (for
//! example, an orphan end that should render as a separate history entry).
use std::time::Duration;
use std::time::Instant;
@ -67,17 +74,24 @@ impl ExecCell {
}
}
/// Marks the most recently matching call as finished and returns whether a call was found.
///
/// Callers should treat `false` as a routing mismatch rather than silently ignoring it. The
/// chat widget uses that signal to avoid attaching an orphan `exec_end` event to an unrelated
/// active exploring cell, which would incorrectly collapse two transcript entries together.
pub(crate) fn complete_call(
&mut self,
call_id: &str,
output: CommandOutput,
duration: Duration,
) {
if let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) {
call.output = Some(output);
call.duration = Some(duration);
call.start_time = None;
}
) -> bool {
let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) else {
return false;
};
call.output = Some(output);
call.duration = Some(duration);
call.start_time = None;
true
}
pub(crate) fn should_flush(&self) -> bool {