Handle orphan exec ends without clobbering active exploring cell (#12313)
Summary - distinguish exec end handling targets (active tracking, active orphan history, new cell) so unified exec responses don’t clobber unrelated exploring cells - ensure orphan ends flush existing exploring history when complete, insert standalone history entries, and keep active cells correct - add regression tests plus a snapshot covering the new behavior and expose the ExecCell completion result for verification Fix for https://github.com/openai/codex/issues/12278 --------- Co-authored-by: Josh McKinney <joshka@openai.com>
This commit is contained in:
parent
4666a6e631
commit
0a0caa9df2
4 changed files with 266 additions and 45 deletions
|
|
@ -2357,7 +2357,25 @@ impl ChatWidget {
|
|||
elapsed
|
||||
}
|
||||
|
||||
/// Finalizes an exec call while preserving the active exec cell grouping contract.
|
||||
///
|
||||
/// Exec begin/end events usually pair through `running_commands`, but unified exec can emit an
|
||||
/// end event for a call that was never materialized as the current active `ExecCell` (for
|
||||
/// example, when another exploring group is still active). In that case we render the end as a
|
||||
/// standalone history entry instead of replacing or flushing the unrelated active exploring
|
||||
/// cell. If this method treated every unknown end as "complete the active cell", the UI could
|
||||
/// merge unrelated commands and hide still-running exploring work.
|
||||
pub(crate) fn handle_exec_end_now(&mut self, ev: ExecCommandEndEvent) {
|
||||
enum ExecEndTarget {
|
||||
// Normal case: the active exec cell already tracks this call id.
|
||||
ActiveTracked,
|
||||
// We have an active exec group, but it does not contain this call id. Render the end
|
||||
// as a standalone finalized history cell so the active group remains intact.
|
||||
OrphanHistoryWhileActiveExec,
|
||||
// No active exec cell can safely own this end; build a new cell from the end payload.
|
||||
NewCell,
|
||||
}
|
||||
|
||||
let running = self.running_commands.remove(&ev.call_id);
|
||||
if self.suppressed_exec_calls.remove(&ev.call_id) {
|
||||
return;
|
||||
|
|
@ -2368,49 +2386,96 @@ impl ChatWidget {
|
|||
};
|
||||
let is_unified_exec_interaction =
|
||||
matches!(source, ExecCommandSource::UnifiedExecInteraction);
|
||||
|
||||
let needs_new = self
|
||||
.active_cell
|
||||
.as_ref()
|
||||
.map(|cell| cell.as_any().downcast_ref::<ExecCell>().is_none())
|
||||
.unwrap_or(true);
|
||||
if needs_new {
|
||||
self.flush_active_cell();
|
||||
self.active_cell = Some(Box::new(new_active_exec_command(
|
||||
ev.call_id.clone(),
|
||||
command,
|
||||
parsed,
|
||||
source,
|
||||
ev.interaction_input.clone(),
|
||||
self.config.animations,
|
||||
)));
|
||||
}
|
||||
|
||||
if let Some(cell) = self
|
||||
.active_cell
|
||||
.as_mut()
|
||||
.and_then(|c| c.as_any_mut().downcast_mut::<ExecCell>())
|
||||
{
|
||||
let output = if is_unified_exec_interaction {
|
||||
CommandOutput {
|
||||
exit_code: ev.exit_code,
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
let end_target = match self.active_cell.as_ref() {
|
||||
Some(cell) => match cell.as_any().downcast_ref::<ExecCell>() {
|
||||
Some(exec_cell)
|
||||
if exec_cell
|
||||
.iter_calls()
|
||||
.any(|call| call.call_id == ev.call_id) =>
|
||||
{
|
||||
ExecEndTarget::ActiveTracked
|
||||
}
|
||||
} else {
|
||||
CommandOutput {
|
||||
exit_code: ev.exit_code,
|
||||
formatted_output: ev.formatted_output.clone(),
|
||||
aggregated_output: ev.aggregated_output.clone(),
|
||||
Some(exec_cell) if exec_cell.is_active() => {
|
||||
ExecEndTarget::OrphanHistoryWhileActiveExec
|
||||
}
|
||||
};
|
||||
cell.complete_call(&ev.call_id, output, ev.duration);
|
||||
if cell.should_flush() {
|
||||
self.flush_active_cell();
|
||||
} else {
|
||||
self.bump_active_cell_revision();
|
||||
Some(_) | None => ExecEndTarget::NewCell,
|
||||
},
|
||||
None => ExecEndTarget::NewCell,
|
||||
};
|
||||
|
||||
// Unified exec interaction rows intentionally hide command output text in the exec cell and
|
||||
// instead render the interaction-specific content elsewhere in the UI.
|
||||
let output = if is_unified_exec_interaction {
|
||||
CommandOutput {
|
||||
exit_code: ev.exit_code,
|
||||
formatted_output: String::new(),
|
||||
aggregated_output: String::new(),
|
||||
}
|
||||
} else {
|
||||
CommandOutput {
|
||||
exit_code: ev.exit_code,
|
||||
formatted_output: ev.formatted_output.clone(),
|
||||
aggregated_output: ev.aggregated_output.clone(),
|
||||
}
|
||||
};
|
||||
|
||||
match end_target {
|
||||
ExecEndTarget::ActiveTracked => {
|
||||
if let Some(cell) = self
|
||||
.active_cell
|
||||
.as_mut()
|
||||
.and_then(|c| c.as_any_mut().downcast_mut::<ExecCell>())
|
||||
{
|
||||
let completed = cell.complete_call(&ev.call_id, output, ev.duration);
|
||||
debug_assert!(completed, "active exec cell should contain {}", ev.call_id);
|
||||
if cell.should_flush() {
|
||||
self.flush_active_cell();
|
||||
} else {
|
||||
self.bump_active_cell_revision();
|
||||
self.request_redraw();
|
||||
}
|
||||
}
|
||||
}
|
||||
ExecEndTarget::OrphanHistoryWhileActiveExec => {
|
||||
let mut orphan = new_active_exec_command(
|
||||
ev.call_id.clone(),
|
||||
command,
|
||||
parsed,
|
||||
source,
|
||||
ev.interaction_input.clone(),
|
||||
self.config.animations,
|
||||
);
|
||||
let completed = orphan.complete_call(&ev.call_id, output, ev.duration);
|
||||
debug_assert!(
|
||||
completed,
|
||||
"new orphan exec cell should contain {}",
|
||||
ev.call_id
|
||||
);
|
||||
self.needs_final_message_separator = true;
|
||||
self.app_event_tx
|
||||
.send(AppEvent::InsertHistoryCell(Box::new(orphan)));
|
||||
self.request_redraw();
|
||||
}
|
||||
ExecEndTarget::NewCell => {
|
||||
self.flush_active_cell();
|
||||
let mut cell = new_active_exec_command(
|
||||
ev.call_id.clone(),
|
||||
command,
|
||||
parsed,
|
||||
source,
|
||||
ev.interaction_input.clone(),
|
||||
self.config.animations,
|
||||
);
|
||||
let completed = cell.complete_call(&ev.call_id, output, ev.duration);
|
||||
debug_assert!(completed, "new exec cell should contain {}", ev.call_id);
|
||||
if cell.should_flush() {
|
||||
self.add_to_history(cell);
|
||||
} else {
|
||||
self.active_cell = Some(Box::new(cell));
|
||||
self.bump_active_cell_revision();
|
||||
self.request_redraw();
|
||||
}
|
||||
}
|
||||
}
|
||||
// Mark that actual work was done (command executed)
|
||||
self.had_work_activity = true;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: tui/src/chatwidget/tests.rs
|
||||
expression: snapshot
|
||||
---
|
||||
History:
|
||||
• Ran echo repro-marker
|
||||
└ repro-marker
|
||||
|
||||
Active:
|
||||
• Exploring
|
||||
└ Read null
|
||||
|
|
@ -3531,6 +3531,114 @@ async fn exec_end_without_begin_uses_event_command() {
|
|||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn exec_end_without_begin_does_not_flush_unrelated_running_exploring_cell() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
chat.on_task_started();
|
||||
|
||||
begin_exec(&mut chat, "call-exploring", "cat /dev/null");
|
||||
assert!(drain_insert_history(&mut rx).is_empty());
|
||||
assert!(active_blob(&chat).contains("Read null"));
|
||||
|
||||
let orphan =
|
||||
begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker");
|
||||
assert!(drain_insert_history(&mut rx).is_empty());
|
||||
|
||||
end_exec(&mut chat, orphan, "repro-marker\n", "", 0);
|
||||
|
||||
let cells = drain_insert_history(&mut rx);
|
||||
assert_eq!(cells.len(), 1, "only the orphan end should be inserted");
|
||||
let orphan_blob = lines_to_single_string(&cells[0]);
|
||||
assert!(
|
||||
orphan_blob.contains("• Ran echo repro-marker"),
|
||||
"expected orphan end to render a standalone entry: {orphan_blob:?}"
|
||||
);
|
||||
let active = active_blob(&chat);
|
||||
assert!(
|
||||
active.contains("• Exploring"),
|
||||
"expected unrelated exploring call to remain active: {active:?}"
|
||||
);
|
||||
assert!(
|
||||
active.contains("Read null"),
|
||||
"expected active exploring command to remain visible: {active:?}"
|
||||
);
|
||||
assert!(
|
||||
!active.contains("echo repro-marker"),
|
||||
"orphaned end should not replace the active exploring cell: {active:?}"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn exec_end_without_begin_flushes_completed_unrelated_exploring_cell() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
chat.on_task_started();
|
||||
|
||||
let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la");
|
||||
end_exec(&mut chat, begin_ls, "", "", 0);
|
||||
assert!(drain_insert_history(&mut rx).is_empty());
|
||||
assert!(active_blob(&chat).contains("ls -la"));
|
||||
|
||||
let orphan = begin_unified_exec_startup(&mut chat, "call-after", "proc-1", "echo after");
|
||||
end_exec(&mut chat, orphan, "after\n", "", 0);
|
||||
|
||||
let cells = drain_insert_history(&mut rx);
|
||||
assert_eq!(
|
||||
cells.len(),
|
||||
2,
|
||||
"completed exploring cell should flush before the orphan entry"
|
||||
);
|
||||
let first = lines_to_single_string(&cells[0]);
|
||||
let second = lines_to_single_string(&cells[1]);
|
||||
assert!(
|
||||
first.contains("• Explored"),
|
||||
"expected flushed exploring cell: {first:?}"
|
||||
);
|
||||
assert!(
|
||||
first.contains("List ls -la"),
|
||||
"expected flushed exploring cell: {first:?}"
|
||||
);
|
||||
assert!(
|
||||
second.contains("• Ran echo after"),
|
||||
"expected orphan end entry after flush: {second:?}"
|
||||
);
|
||||
assert!(
|
||||
chat.active_cell.is_none(),
|
||||
"both entries should be finalized"
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn overlapping_exploring_exec_end_is_not_misclassified_as_orphan() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
||||
let begin_ls = begin_exec(&mut chat, "call-ls", "ls -la");
|
||||
let begin_cat = begin_exec(&mut chat, "call-cat", "cat foo.txt");
|
||||
assert!(drain_insert_history(&mut rx).is_empty());
|
||||
|
||||
end_exec(&mut chat, begin_ls, "foo.txt\n", "", 0);
|
||||
|
||||
let cells = drain_insert_history(&mut rx);
|
||||
assert!(
|
||||
cells.is_empty(),
|
||||
"tracked end inside an exploring cell should not render as an orphan"
|
||||
);
|
||||
let active = active_blob(&chat);
|
||||
assert!(
|
||||
active.contains("List ls -la"),
|
||||
"expected first command still grouped: {active:?}"
|
||||
);
|
||||
assert!(
|
||||
active.contains("Read foo.txt"),
|
||||
"expected second running command to stay in the same active cell: {active:?}"
|
||||
);
|
||||
assert!(
|
||||
active.contains("• Exploring"),
|
||||
"expected grouped exploring header to remain active: {active:?}"
|
||||
);
|
||||
|
||||
end_exec(&mut chat, begin_cat, "hello\n", "", 0);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn exec_history_shows_unified_exec_startup_commands() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
|
@ -3575,6 +3683,29 @@ async fn exec_history_shows_unified_exec_tool_calls() {
|
|||
assert_eq!(blob, "• Explored\n └ List ls\n");
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_unknown_end_with_active_exploring_cell_snapshot() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
chat.on_task_started();
|
||||
|
||||
begin_exec(&mut chat, "call-exploring", "cat /dev/null");
|
||||
let orphan =
|
||||
begin_unified_exec_startup(&mut chat, "call-orphan", "proc-1", "echo repro-marker");
|
||||
end_exec(&mut chat, orphan, "repro-marker\n", "", 0);
|
||||
|
||||
let cells = drain_insert_history(&mut rx);
|
||||
let history = cells
|
||||
.iter()
|
||||
.map(|lines| lines_to_single_string(lines))
|
||||
.collect::<String>();
|
||||
let active = active_blob(&chat);
|
||||
let snapshot = format!("History:\n{history}\nActive:\n{active}");
|
||||
assert_snapshot!(
|
||||
"unified_exec_unknown_end_with_active_exploring_cell",
|
||||
snapshot
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn unified_exec_end_after_task_complete_is_suppressed() {
|
||||
let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,10 @@
|
|||
//! Data model for grouped exec-call history cells in the TUI transcript.
|
||||
//!
|
||||
//! An `ExecCell` can represent either a single command or an "exploring" group of related read/
|
||||
//! list/search commands. The chat widget relies on stable `call_id` matching to route progress and
|
||||
//! end events into the right cell, and it treats "call id not found" as a real signal (for
|
||||
//! example, an orphan end that should render as a separate history entry).
|
||||
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
|
|
@ -67,17 +74,24 @@ impl ExecCell {
|
|||
}
|
||||
}
|
||||
|
||||
/// Marks the most recently matching call as finished and returns whether a call was found.
|
||||
///
|
||||
/// Callers should treat `false` as a routing mismatch rather than silently ignoring it. The
|
||||
/// chat widget uses that signal to avoid attaching an orphan `exec_end` event to an unrelated
|
||||
/// active exploring cell, which would incorrectly collapse two transcript entries together.
|
||||
pub(crate) fn complete_call(
|
||||
&mut self,
|
||||
call_id: &str,
|
||||
output: CommandOutput,
|
||||
duration: Duration,
|
||||
) {
|
||||
if let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) {
|
||||
call.output = Some(output);
|
||||
call.duration = Some(duration);
|
||||
call.start_time = None;
|
||||
}
|
||||
) -> bool {
|
||||
let Some(call) = self.calls.iter_mut().rev().find(|c| c.call_id == call_id) else {
|
||||
return false;
|
||||
};
|
||||
call.output = Some(output);
|
||||
call.duration = Some(duration);
|
||||
call.start_time = None;
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn should_flush(&self) -> bool {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue