From 0a0caa9df266ebc124d524ee6ad23ee6513fe501 Mon Sep 17 00:00:00 2001 From: jif-oai Date: Sun, 22 Feb 2026 14:26:58 +0000 Subject: [PATCH] Handle orphan exec ends without clobbering active exploring cell (#12313) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary - distinguish exec end handling targets (active tracking, active orphan history, new cell) so unified exec responses don’t clobber unrelated exploring cells - ensure orphan ends flush existing exploring history when complete, insert standalone history entries, and keep active cells correct - add regression tests plus a snapshot covering the new behavior and expose the ExecCell completion result for verification Fix for https://github.com/openai/codex/issues/12278 --------- Co-authored-by: Josh McKinney --- codex-rs/tui/src/chatwidget.rs | 143 +++++++++++++----- ...nknown_end_with_active_exploring_cell.snap | 11 ++ codex-rs/tui/src/chatwidget/tests.rs | 131 ++++++++++++++++ codex-rs/tui/src/exec_cell/model.rs | 26 +++- 4 files changed, 266 insertions(+), 45 deletions(-) create mode 100644 codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__unified_exec_unknown_end_with_active_exploring_cell.snap diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 400713b22..309ce80ce 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -2357,7 +2357,25 @@ impl ChatWidget { elapsed } + /// Finalizes an exec call while preserving the active exec cell grouping contract. + /// + /// Exec begin/end events usually pair through `running_commands`, but unified exec can emit an + /// end event for a call that was never materialized as the current active `ExecCell` (for + /// example, when another exploring group is still active). In that case we render the end as a + /// standalone history entry instead of replacing or flushing the unrelated active exploring + /// cell. If this method treated every unknown end as "complete the active cell", the UI could + /// merge unrelated commands and hide still-running exploring work. pub(crate) fn handle_exec_end_now(&mut self, ev: ExecCommandEndEvent) { + enum ExecEndTarget { + // Normal case: the active exec cell already tracks this call id. + ActiveTracked, + // We have an active exec group, but it does not contain this call id. Render the end + // as a standalone finalized history cell so the active group remains intact. + OrphanHistoryWhileActiveExec, + // No active exec cell can safely own this end; build a new cell from the end payload. + NewCell, + } + let running = self.running_commands.remove(&ev.call_id); if self.suppressed_exec_calls.remove(&ev.call_id) { return; @@ -2368,49 +2386,96 @@ impl ChatWidget { }; let is_unified_exec_interaction = matches!(source, ExecCommandSource::UnifiedExecInteraction); - - let needs_new = self - .active_cell - .as_ref() - .map(|cell| cell.as_any().downcast_ref::().is_none()) - .unwrap_or(true); - if needs_new { - self.flush_active_cell(); - self.active_cell = Some(Box::new(new_active_exec_command( - ev.call_id.clone(), - command, - parsed, - source, - ev.interaction_input.clone(), - self.config.animations, - ))); - } - - if let Some(cell) = self - .active_cell - .as_mut() - .and_then(|c| c.as_any_mut().downcast_mut::()) - { - let output = if is_unified_exec_interaction { - CommandOutput { - exit_code: ev.exit_code, - formatted_output: String::new(), - aggregated_output: String::new(), + let end_target = match self.active_cell.as_ref() { + Some(cell) => match cell.as_any().downcast_ref::() { + Some(exec_cell) + if exec_cell + .iter_calls() + .any(|call| call.call_id == ev.call_id) => + { + ExecEndTarget::ActiveTracked } - } else { - CommandOutput { - exit_code: ev.exit_code, - formatted_output: ev.formatted_output.clone(), - aggregated_output: ev.aggregated_output.clone(), + Some(exec_cell) if exec_cell.is_active() => { + ExecEndTarget::OrphanHistoryWhileActiveExec } - }; - cell.complete_call(&ev.call_id, output, ev.duration); - if cell.should_flush() { - self.flush_active_cell(); - } else { - self.bump_active_cell_revision(); + Some(_) | None => ExecEndTarget::NewCell, + }, + None => ExecEndTarget::NewCell, + }; + + // Unified exec interaction rows intentionally hide command output text in the exec cell and + // instead render the interaction-specific content elsewhere in the UI. + let output = if is_unified_exec_interaction { + CommandOutput { + exit_code: ev.exit_code, + formatted_output: String::new(), + aggregated_output: String::new(), + } + } else { + CommandOutput { + exit_code: ev.exit_code, + formatted_output: ev.formatted_output.clone(), + aggregated_output: ev.aggregated_output.clone(), + } + }; + + match end_target { + ExecEndTarget::ActiveTracked => { + if let Some(cell) = self + .active_cell + .as_mut() + .and_then(|c| c.as_any_mut().downcast_mut::()) + { + let completed = cell.complete_call(&ev.call_id, output, ev.duration); + debug_assert!(completed, "active exec cell should contain {}", ev.call_id); + if cell.should_flush() { + self.flush_active_cell(); + } else { + self.bump_active_cell_revision(); + self.request_redraw(); + } + } + } + ExecEndTarget::OrphanHistoryWhileActiveExec => { + let mut orphan = new_active_exec_command( + ev.call_id.clone(), + command, + parsed, + source, + ev.interaction_input.clone(), + self.config.animations, + ); + let completed = orphan.complete_call(&ev.call_id, output, ev.duration); + debug_assert!( + completed, + "new orphan exec cell should contain {}", + ev.call_id + ); + self.needs_final_message_separator = true; + self.app_event_tx + .send(AppEvent::InsertHistoryCell(Box::new(orphan))); self.request_redraw(); } + ExecEndTarget::NewCell => { + self.flush_active_cell(); + let mut cell = new_active_exec_command( + ev.call_id.clone(), + command, + parsed, + source, + ev.interaction_input.clone(), + self.config.animations, + ); + let completed = cell.complete_call(&ev.call_id, output, ev.duration); + debug_assert!(completed, "new exec cell should contain {}", ev.call_id); + if cell.should_flush() { + self.add_to_history(cell); + } else { + self.active_cell = Some(Box::new(cell)); + self.bump_active_cell_revision(); + self.request_redraw(); + } + } } // Mark that actual work was done (command executed) self.had_work_activity = true; diff --git a/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__unified_exec_unknown_end_with_active_exploring_cell.snap b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__unified_exec_unknown_end_with_active_exploring_cell.snap new file mode 100644 index 000000000..85259b0b1 --- /dev/null +++ b/codex-rs/tui/src/chatwidget/snapshots/codex_tui__chatwidget__tests__unified_exec_unknown_end_with_active_exploring_cell.snap @@ -0,0 +1,11 @@ +--- +source: tui/src/chatwidget/tests.rs +expression: snapshot +--- +History: +• Ran echo repro-marker + └ repro-marker + +Active: +• Exploring + └ Read null diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index f1f0adf2a..d7424f720 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -3531,6 +3531,114 @@ async fn exec_end_without_begin_uses_event_command() { ); } +#[tokio::test] +async fn exec_end_without_begin_does_not_flush_unrelated_running_exploring_cell() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + chat.on_task_started(); + + begin_exec(&mut chat, "call-exploring", "cat /dev/null"); + assert!(drain_insert_history(&mut rx).is_empty()); + assert!(active_blob(&chat).contains("Read null")); + + let orphan = + begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker"); + assert!(drain_insert_history(&mut rx).is_empty()); + + end_exec(&mut chat, orphan, "repro-marker\n", "", 0); + + let cells = drain_insert_history(&mut rx); + assert_eq!(cells.len(), 1, "only the orphan end should be inserted"); + let orphan_blob = lines_to_single_string(&cells[0]); + assert!( + orphan_blob.contains("• Ran echo repro-marker"), + "expected orphan end to render a standalone entry: {orphan_blob:?}" + ); + let active = active_blob(&chat); + assert!( + active.contains("• Exploring"), + "expected unrelated exploring call to remain active: {active:?}" + ); + assert!( + active.contains("Read null"), + "expected active exploring command to remain visible: {active:?}" + ); + assert!( + !active.contains("echo repro-marker"), + "orphaned end should not replace the active exploring cell: {active:?}" + ); +} + +#[tokio::test] +async fn exec_end_without_begin_flushes_completed_unrelated_exploring_cell() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + chat.on_task_started(); + + let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la"); + end_exec(&mut chat, begin_ls, "", "", 0); + assert!(drain_insert_history(&mut rx).is_empty()); + assert!(active_blob(&chat).contains("ls -la")); + + let orphan = begin_unified_exec_startup(&mut chat, "call-after", "proc-1", "echo after"); + end_exec(&mut chat, orphan, "after\n", "", 0); + + let cells = drain_insert_history(&mut rx); + assert_eq!( + cells.len(), + 2, + "completed exploring cell should flush before the orphan entry" + ); + let first = lines_to_single_string(&cells[0]); + let second = lines_to_single_string(&cells[1]); + assert!( + first.contains("• Explored"), + "expected flushed exploring cell: {first:?}" + ); + assert!( + first.contains("List ls -la"), + "expected flushed exploring cell: {first:?}" + ); + assert!( + second.contains("• Ran echo after"), + "expected orphan end entry after flush: {second:?}" + ); + assert!( + chat.active_cell.is_none(), + "both entries should be finalized" + ); +} + +#[tokio::test] +async fn overlapping_exploring_exec_end_is_not_misclassified_as_orphan() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + + let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la"); + let begin_cat = begin_exec(&mut chat, "call-cat", "cat foo.txt"); + assert!(drain_insert_history(&mut rx).is_empty()); + + end_exec(&mut chat, begin_ls, "foo.txt\n", "", 0); + + let cells = drain_insert_history(&mut rx); + assert!( + cells.is_empty(), + "tracked end inside an exploring cell should not render as an orphan" + ); + let active = active_blob(&chat); + assert!( + active.contains("List ls -la"), + "expected first command still grouped: {active:?}" + ); + assert!( + active.contains("Read foo.txt"), + "expected second running command to stay in the same active cell: {active:?}" + ); + assert!( + active.contains("• Exploring"), + "expected grouped exploring header to remain active: {active:?}" + ); + + end_exec(&mut chat, begin_cat, "hello\n", "", 0); +} + #[tokio::test] async fn exec_history_shows_unified_exec_startup_commands() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; @@ -3575,6 +3683,29 @@ async fn exec_history_shows_unified_exec_tool_calls() { assert_eq!(blob, "• Explored\n └ List ls\n"); } +#[tokio::test] +async fn unified_exec_unknown_end_with_active_exploring_cell_snapshot() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + chat.on_task_started(); + + begin_exec(&mut chat, "call-exploring", "cat /dev/null"); + let orphan = + begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker"); + end_exec(&mut chat, orphan, "repro-marker\n", "", 0); + + let cells = drain_insert_history(&mut rx); + let history = cells + .iter() + .map(|lines| lines_to_single_string(lines)) + .collect::(); + let active = active_blob(&chat); + let snapshot = format!("History:\n{history}\nActive:\n{active}"); + assert_snapshot!( + "unified_exec_unknown_end_with_active_exploring_cell", + snapshot + ); +} + #[tokio::test] async fn unified_exec_end_after_task_complete_is_suppressed() { let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; diff --git a/codex-rs/tui/src/exec_cell/model.rs b/codex-rs/tui/src/exec_cell/model.rs index f08b088ef..878d42c71 100644 --- a/codex-rs/tui/src/exec_cell/model.rs +++ b/codex-rs/tui/src/exec_cell/model.rs @@ -1,3 +1,10 @@ +//! Data model for grouped exec-call history cells in the TUI transcript. +//! +//! An `ExecCell` can represent either a single command or an "exploring" group of related read/ +//! list/search commands. The chat widget relies on stable `call_id` matching to route progress and +//! end events into the right cell, and it treats "call id not found" as a real signal (for +//! example, an orphan end that should render as a separate history entry). + use std::time::Duration; use std::time::Instant; @@ -67,17 +74,24 @@ impl ExecCell { } } + /// Marks the most recently matching call as finished and returns whether a call was found. + /// + /// Callers should treat `false` as a routing mismatch rather than silently ignoring it. The + /// chat widget uses that signal to avoid attaching an orphan `exec_end` event to an unrelated + /// active exploring cell, which would incorrectly collapse two transcript entries together. pub(crate) fn complete_call( &mut self, call_id: &str, output: CommandOutput, duration: Duration, - ) { - if let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) { - call.output = Some(output); - call.duration = Some(duration); - call.start_time = None; - } + ) -> bool { + let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) else { + return false; + }; + call.output = Some(output); + call.duration = Some(duration); + call.start_time = None; + true } pub(crate) fn should_flush(&self) -> bool {