perf(tui2): cache transcript view rendering (#8693)

The transcript viewport draws every frame. Ratatui's Line::render_ref
does grapheme segmentation and span layout, so repeated redraws can burn
CPU during streaming even when the visible transcript hasn't changed.

Introduce TranscriptViewCache to reduce per-frame work:
- WrappedTranscriptCache memoizes flattened+wrapped transcript lines per
width, appends incrementally as new cells arrive, and rebuilds on width
change, truncation (backtrack), or transcript replacement.
- TranscriptRasterCache caches rasterized rows (Vec<Cell>) per line
index and user-row styling; redraws copy cells instead of rerendering
spans.

The caches are width-scoped and store base transcript content only;
selection highlighting and copy affordances are applied after drawing.
User rows include the row-wide base style in the cached raster.

Refactor transcript_render to expose append_wrapped_transcript_cell for
incremental building and add a test that incremental append matches the
full build.

Add docs/tui2/performance-testing.md as a playbook for macOS sample
profiles and hotspot greps.

Expand transcript_view_cache tests to cover rebuild conditions, raster
equivalence vs direct rendering, user-row caching, and eviction.

Test: cargo test -p codex-tui2
This commit is contained in:
Josh McKinney 2026-01-03 11:44:27 -08:00 committed by GitHub
parent ee9d441777
commit 90f37e8549
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 1359 additions and 124 deletions

View file

@ -1,5 +1,13 @@
use crate::color::perceptual_distance;
use ratatui::style::Color;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
static DEFAULT_PALETTE_VERSION: AtomicU64 = AtomicU64::new(0);
fn bump_palette_version() {
DEFAULT_PALETTE_VERSION.fetch_add(1, Ordering::Relaxed);
}
/// Returns the closest color to the target color that the terminal can display.
pub fn best_color(target: (u8, u8, u8)) -> Color {
@ -27,6 +35,7 @@ pub fn best_color(target: (u8, u8, u8)) -> Color {
pub fn requery_default_colors() {
imp::requery_default_colors();
bump_palette_version();
}
#[derive(Clone, Copy)]
@ -47,6 +56,14 @@ pub fn default_bg() -> Option<(u8, u8, u8)> {
default_colors().map(|c| c.bg)
}
/// Returns a monotonic counter that increments whenever `requery_default_colors()` runs
/// successfully so cached renderers can know when their styling assumptions (e.g.
/// background colors baked into cached transcript rows) are stale and need invalidation.
#[allow(dead_code)]
pub fn palette_version() -> u64 {
DEFAULT_PALETTE_VERSION.load(Ordering::Relaxed)
}
#[cfg(all(unix, not(test)))]
mod imp {
use super::DefaultColors;

View file

@ -22,6 +22,7 @@ use crate::transcript_multi_click::TranscriptMultiClick;
use crate::transcript_selection::TRANSCRIPT_GUTTER_COLS;
use crate::transcript_selection::TranscriptSelection;
use crate::transcript_selection::TranscriptSelectionPoint;
use crate::transcript_view_cache::TranscriptViewCache;
use crate::tui;
use crate::tui::TuiEvent;
use crate::tui::scrolling::MouseScrollState;
@ -29,7 +30,6 @@ use crate::tui::scrolling::ScrollConfig;
use crate::tui::scrolling::ScrollConfigOverrides;
use crate::tui::scrolling::ScrollDirection;
use crate::tui::scrolling::ScrollUpdate;
use crate::tui::scrolling::TranscriptLineMeta;
use crate::tui::scrolling::TranscriptScroll;
use crate::update_action::UpdateAction;
use codex_ansi_escape::ansi_escape_line;
@ -326,6 +326,7 @@ pub(crate) struct App {
pub(crate) file_search: FileSearchManager,
pub(crate) transcript_cells: Vec<Arc<dyn HistoryCell>>,
transcript_view_cache: TranscriptViewCache,
#[allow(dead_code)]
transcript_scroll: TranscriptScroll,
@ -492,6 +493,7 @@ impl App {
file_search,
enhanced_keys_supported,
transcript_cells: Vec::new(),
transcript_view_cache: TranscriptViewCache::new(),
transcript_scroll: TranscriptScroll::default(),
transcript_selection: TranscriptSelection::default(),
transcript_multi_click: TranscriptMultiClick::default(),
@ -707,10 +709,10 @@ impl App {
height: max_transcript_height,
};
let transcript =
crate::transcript_render::build_wrapped_transcript_lines(cells, transcript_area.width);
let (lines, line_meta) = (transcript.lines, transcript.meta);
if lines.is_empty() {
self.transcript_view_cache
.ensure_wrapped(cells, transcript_area.width);
let total_lines = self.transcript_view_cache.lines().len();
if total_lines == 0 {
Clear.render_ref(transcript_area, frame.buffer);
self.transcript_scroll = TranscriptScroll::default();
self.transcript_view_top = 0;
@ -718,17 +720,14 @@ impl App {
return area.y;
}
let is_user_cell: Vec<bool> = cells
.iter()
.map(|c| c.as_any().is::<UserHistoryCell>())
.collect();
let total_lines = lines.len();
self.transcript_total_lines = total_lines;
let max_visible = std::cmp::min(max_transcript_height as usize, total_lines);
let max_start = total_lines.saturating_sub(max_visible);
let (scroll_state, top_offset) = self.transcript_scroll.resolve_top(&line_meta, max_start);
let (scroll_state, top_offset) = {
let line_meta = self.transcript_view_cache.line_meta();
self.transcript_scroll.resolve_top(line_meta, max_start)
};
self.transcript_scroll = scroll_state;
self.transcript_view_top = top_offset;
@ -762,6 +761,11 @@ impl App {
height: transcript_visible_height,
};
// Cache a few viewports worth of rasterized rows so redraws during streaming can cheaply
// copy already-rendered `Cell`s instead of re-running grapheme segmentation.
self.transcript_view_cache
.set_raster_capacity(max_visible.saturating_mul(4).max(256));
for (row_index, line_index) in (top_offset..total_lines).enumerate() {
if row_index >= max_visible {
break;
@ -775,21 +779,8 @@ impl App {
height: 1,
};
let is_user_row = line_meta
.get(line_index)
.and_then(TranscriptLineMeta::cell_index)
.map(|cell_index| is_user_cell.get(cell_index).copied().unwrap_or(false))
.unwrap_or(false);
if is_user_row {
let base_style = crate::style::user_message_style();
for x in row_area.x..row_area.right() {
let cell = &mut frame.buffer[(x, y)];
let style = cell.style().patch(base_style);
cell.set_style(style);
}
}
lines[line_index].render_ref(row_area, frame.buffer);
self.transcript_view_cache
.render_row_index_into(line_index, row_area, frame.buffer);
}
self.apply_transcript_selection(transcript_area, frame.buffer);
@ -1102,12 +1093,12 @@ impl App {
return;
}
let transcript =
crate::transcript_render::build_wrapped_transcript_lines(&self.transcript_cells, width);
let line_meta = transcript.meta;
self.transcript_view_cache
.ensure_wrapped(&self.transcript_cells, width);
let line_meta = self.transcript_view_cache.line_meta();
self.transcript_scroll =
self.transcript_scroll
.scrolled_by(delta_lines, &line_meta, visible_lines);
.scrolled_by(delta_lines, line_meta, visible_lines);
if schedule_frame {
// Request a redraw; the frame scheduler coalesces bursts and clamps to 60fps.
@ -1127,9 +1118,10 @@ impl App {
return;
}
let transcript =
crate::transcript_render::build_wrapped_transcript_lines(&self.transcript_cells, width);
let (lines, line_meta) = (transcript.lines, transcript.meta);
self.transcript_view_cache
.ensure_wrapped(&self.transcript_cells, width);
let lines = self.transcript_view_cache.lines();
let line_meta = self.transcript_view_cache.line_meta();
if lines.is_empty() || line_meta.is_empty() {
return;
}
@ -1149,7 +1141,7 @@ impl App {
}
};
if let Some(scroll_state) = TranscriptScroll::anchor_for(&line_meta, top_offset) {
if let Some(scroll_state) = TranscriptScroll::anchor_for(line_meta, top_offset) {
self.transcript_scroll = scroll_state;
}
}
@ -2053,6 +2045,7 @@ mod tests {
use crate::history_cell::UserHistoryCell;
use crate::history_cell::new_session_info;
use crate::transcript_copy_ui::CopySelectionShortcut;
use crate::tui::scrolling::TranscriptLineMeta;
use codex_core::AuthManager;
use codex_core::CodexAuth;
use codex_core::ConversationManager;
@ -2090,6 +2083,7 @@ mod tests {
active_profile: None,
file_search,
transcript_cells: Vec::new(),
transcript_view_cache: TranscriptViewCache::new(),
transcript_scroll: TranscriptScroll::default(),
transcript_selection: TranscriptSelection::default(),
transcript_multi_click: TranscriptMultiClick::default(),
@ -2140,6 +2134,7 @@ mod tests {
active_profile: None,
file_search,
transcript_cells: Vec::new(),
transcript_view_cache: TranscriptViewCache::new(),
transcript_scroll: TranscriptScroll::default(),
transcript_selection: TranscriptSelection::default(),
transcript_multi_click: TranscriptMultiClick::default(),

View file

@ -81,6 +81,7 @@ mod transcript_copy_ui;
mod transcript_multi_click;
mod transcript_render;
mod transcript_selection;
mod transcript_view_cache;
mod tui;
mod ui_consts;
pub mod update_action;

View file

@ -1,5 +1,13 @@
use crate::color::perceptual_distance;
use ratatui::style::Color;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::Ordering;
static DEFAULT_PALETTE_VERSION: AtomicU64 = AtomicU64::new(0);
fn bump_palette_version() {
DEFAULT_PALETTE_VERSION.fetch_add(1, Ordering::Relaxed);
}
/// Returns the closest color to the target color that the terminal can display.
pub fn best_color(target: (u8, u8, u8)) -> Color {
@ -27,6 +35,7 @@ pub fn best_color(target: (u8, u8, u8)) -> Color {
pub fn requery_default_colors() {
imp::requery_default_colors();
bump_palette_version();
}
#[derive(Clone, Copy)]
@ -47,6 +56,10 @@ pub fn default_bg() -> Option<(u8, u8, u8)> {
default_colors().map(|c| c.bg)
}
pub fn palette_version() -> u64 {
DEFAULT_PALETTE_VERSION.load(Ordering::Relaxed)
}
#[cfg(all(unix, not(test)))]
mod imp {
use super::DefaultColors;

View file

@ -113,9 +113,6 @@ pub(crate) fn build_wrapped_transcript_lines(
cells: &[Arc<dyn HistoryCell>],
width: u16,
) -> TranscriptLines {
use crate::render::line_utils::line_to_static;
use ratatui::style::Color;
if width == 0 {
return TranscriptLines {
lines: Vec::new(),
@ -124,110 +121,140 @@ pub(crate) fn build_wrapped_transcript_lines(
};
}
let mut transcript = TranscriptLines {
lines: Vec::new(),
meta: Vec::new(),
joiner_before: Vec::new(),
};
let mut has_emitted_lines = false;
let base_opts: crate::wrapping::RtOptions<'_> =
crate::wrapping::RtOptions::new(width.max(1) as usize);
let mut lines: Vec<Line<'static>> = Vec::new();
let mut meta: Vec<TranscriptLineMeta> = Vec::new();
let mut joiner_before: Vec<Option<String>> = Vec::new();
let mut has_emitted_lines = false;
for (cell_index, cell) in cells.iter().enumerate() {
// Start from each cell's transcript view (prefixes/indents already applied), then apply
// viewport wrapping to prose while keeping preformatted content intact.
let rendered = cell.transcript_lines_with_joiners(width);
if rendered.lines.is_empty() {
append_wrapped_transcript_cell(
&mut transcript,
&mut has_emitted_lines,
cell_index,
cell,
width,
&base_opts,
);
}
transcript
}
/// Append a single history cell to an existing wrapped transcript.
///
/// This is the incremental building block used by transcript caching: it applies the same
/// flattening and viewport-wrapping rules as [`build_wrapped_transcript_lines`], but for one cell
/// at a time.
///
/// `has_emitted_lines` tracks whether the output already contains any non-spacer lines and is used
/// to decide when to insert an inter-cell spacer row.
pub(crate) fn append_wrapped_transcript_cell(
out: &mut TranscriptLines,
has_emitted_lines: &mut bool,
cell_index: usize,
cell: &Arc<dyn HistoryCell>,
width: u16,
base_opts: &crate::wrapping::RtOptions<'_>,
) {
use crate::render::line_utils::line_to_static;
use ratatui::style::Color;
if width == 0 {
return;
}
// Start from each cell's transcript view (prefixes/indents already applied), then apply
// viewport wrapping to prose while keeping preformatted content intact.
let rendered = cell.transcript_lines_with_joiners(width);
if rendered.lines.is_empty() {
return;
}
if !cell.is_stream_continuation() {
if *has_emitted_lines {
out.lines.push(Line::from(""));
out.meta.push(TranscriptLineMeta::Spacer);
out.joiner_before.push(None);
} else {
*has_emitted_lines = true;
}
}
// `visual_line_in_cell` counts the output visual lines produced from this cell *after* any
// viewport wrapping. This is distinct from `base_idx` (the index into the cell's input
// lines), since a single input line may wrap into multiple visual lines.
let mut visual_line_in_cell: usize = 0;
let mut first = true;
for (base_idx, base_line) in rendered.lines.iter().enumerate() {
// Preserve code blocks (and other preformatted text) by not applying
// viewport wrapping, so indentation remains meaningful for copy/paste.
if base_line.style.fg == Some(Color::Cyan) {
out.lines.push(base_line.clone());
out.meta.push(TranscriptLineMeta::CellLine {
cell_index,
line_in_cell: visual_line_in_cell,
});
visual_line_in_cell = visual_line_in_cell.saturating_add(1);
// Preformatted lines are treated as hard breaks; we keep the cell-provided joiner
// (which is typically `None`).
out.joiner_before.push(
rendered
.joiner_before
.get(base_idx)
.cloned()
.unwrap_or(None),
);
first = false;
continue;
}
if !cell.is_stream_continuation() {
if has_emitted_lines {
lines.push(Line::from(""));
meta.push(TranscriptLineMeta::Spacer);
joiner_before.push(None);
} else {
has_emitted_lines = true;
}
}
let opts = if first {
base_opts.clone()
} else {
// For subsequent input lines within a cell, treat the "initial" indent as the cell's
// subsequent indent (matches textarea wrapping expectations).
base_opts
.clone()
.initial_indent(base_opts.subsequent_indent.clone())
};
// `word_wrap_line_with_joiners` returns both the wrapped visual lines and, for each
// continuation segment, the exact joiner substring that should be inserted instead of a
// newline when copying as a logical line.
let (wrapped, wrapped_joiners) =
crate::wrapping::word_wrap_line_with_joiners(base_line, opts);
// `visual_line_in_cell` counts the output visual lines produced from this cell *after* any
// viewport wrapping. This is distinct from `base_idx` (the index into the cell's input
// lines), since a single input line may wrap into multiple visual lines.
let mut visual_line_in_cell: usize = 0;
let mut first = true;
for (base_idx, base_line) in rendered.lines.iter().enumerate() {
// Preserve code blocks (and other preformatted text) by not applying
// viewport wrapping, so indentation remains meaningful for copy/paste.
if base_line.style.fg == Some(Color::Cyan) {
lines.push(base_line.clone());
meta.push(TranscriptLineMeta::CellLine {
cell_index,
line_in_cell: visual_line_in_cell,
});
visual_line_in_cell = visual_line_in_cell.saturating_add(1);
// Preformatted lines are treated as hard breaks; we keep the cell-provided joiner
// (which is typically `None`).
joiner_before.push(
for (seg_idx, (wrapped_line, seg_joiner)) in
wrapped.into_iter().zip(wrapped_joiners).enumerate()
{
out.lines.push(line_to_static(&wrapped_line));
out.meta.push(TranscriptLineMeta::CellLine {
cell_index,
line_in_cell: visual_line_in_cell,
});
visual_line_in_cell = visual_line_in_cell.saturating_add(1);
if seg_idx == 0 {
// The first wrapped segment corresponds to the original input line, so we use the
// cell-provided joiner (hard break vs soft break *between input lines*).
out.joiner_before.push(
rendered
.joiner_before
.get(base_idx)
.cloned()
.unwrap_or(None),
);
first = false;
continue;
}
let opts = if first {
base_opts.clone()
} else {
// For subsequent input lines within a cell, treat the "initial" indent as the
// cell's subsequent indent (matches textarea wrapping expectations).
base_opts
.clone()
.initial_indent(base_opts.subsequent_indent.clone())
};
// `word_wrap_line_with_joiners` returns both the wrapped visual lines and, for each
// continuation segment, the exact joiner substring that should be inserted instead of a
// newline when copying as a logical line.
let (wrapped, wrapped_joiners) =
crate::wrapping::word_wrap_line_with_joiners(base_line, opts);
for (seg_idx, (wrapped_line, seg_joiner)) in
wrapped.into_iter().zip(wrapped_joiners).enumerate()
{
lines.push(line_to_static(&wrapped_line));
meta.push(TranscriptLineMeta::CellLine {
cell_index,
line_in_cell: visual_line_in_cell,
});
visual_line_in_cell = visual_line_in_cell.saturating_add(1);
if seg_idx == 0 {
// The first wrapped segment corresponds to the original input line, so we use
// the cell-provided joiner (hard break vs soft break *between input lines*).
joiner_before.push(
rendered
.joiner_before
.get(base_idx)
.cloned()
.unwrap_or(None),
);
} else {
// Subsequent wrapped segments are soft-wrap continuations produced by viewport
// wrapping, so we use the wrap-derived joiner.
joiner_before.push(seg_joiner);
}
// Subsequent wrapped segments are soft-wrap continuations produced by viewport
// wrapping, so we use the wrap-derived joiner.
out.joiner_before.push(seg_joiner);
}
first = false;
}
}
TranscriptLines {
lines,
meta,
joiner_before,
first = false;
}
}
@ -396,4 +423,56 @@ mod tests {
]
);
}
#[test]
fn append_wrapped_transcript_cell_matches_full_build() {
use ratatui::style::Color;
use ratatui::style::Style;
let cells: Vec<Arc<dyn HistoryCell>> = vec![
Arc::new(FakeCell {
lines: vec![Line::from("• hello world")],
joiner_before: vec![None],
is_stream_continuation: false,
}),
// A preformatted line should not be viewport-wrapped.
Arc::new(FakeCell {
lines: vec![Line::from("• 1234567890").style(Style::default().fg(Color::Cyan))],
joiner_before: vec![None],
is_stream_continuation: false,
}),
// A stream continuation should not get an inter-cell spacer row.
Arc::new(FakeCell {
lines: vec![Line::from("• wrap me please")],
joiner_before: vec![None],
is_stream_continuation: true,
}),
];
let width = 7;
let full = build_wrapped_transcript_lines(&cells, width);
let mut out = TranscriptLines {
lines: Vec::new(),
meta: Vec::new(),
joiner_before: Vec::new(),
};
let mut has_emitted_lines = false;
let base_opts: crate::wrapping::RtOptions<'_> =
crate::wrapping::RtOptions::new(width.max(1) as usize);
for (cell_index, cell) in cells.iter().enumerate() {
append_wrapped_transcript_cell(
&mut out,
&mut has_emitted_lines,
cell_index,
cell,
width,
&base_opts,
);
}
assert_eq!(out.lines, full.lines);
assert_eq!(out.meta, full.meta);
assert_eq!(out.joiner_before, full.joiner_before);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,97 @@
# Performance testing (`codex-tui2`)
This doc captures a repeatable workflow for investigating `codex-tui2` performance issues
(especially high idle CPU and high CPU while streaming) and validating optimizations to the draw
hot path.
## Scope (this round)
The current focus is the transcript draw hot path, specifically the cost of repeatedly rendering
the same visible transcript lines via Ratatuis `Line::render_ref` (notably grapheme segmentation
and span layout).
The intended mitigation is a **rasterization cache**: render a wrapped transcript `Line` into a
row of `Cell`s once, cache it, and on subsequent redraws copy cached cells into the frame buffer.
Key invariants:
- The cache is width-scoped (invalidate on terminal width changes).
- The cache stores **base content** only; selection highlight and copy affordances are applied
after rendering, so they dont pollute cached rows.
## Roles
- Human: runs `codex-tui2` in an interactive terminal (e.g. Ghostty), triggers “idle” and
“streaming” scenarios, and captures profiles.
- Assistant (or a script): reads profile output and extracts hotspots and deltas.
## Baseline setup
Build from a clean checkout:
```sh
cd codex-rs
cargo build -p codex-tui2
```
Run `codex-tui2` in a terminal and get a PID (macOS):
```sh
pgrep -n codex-tui2
```
Track CPU quickly while reproducing:
```sh
top -pid "$(pgrep -n codex-tui2)"
```
## Capture profiles (macOS)
Capture both an “idle” and a “streaming” profile so hotspots are not conflated:
```sh
sample "$(pgrep -n codex-tui2)" 1 -file /tmp/tui2.idle.sample.txt
sample "$(pgrep -n codex-tui2)" 1 -file /tmp/tui2.streaming.sample.txt
```
For the streaming sample, trigger a response that emits many deltas (e.g. “Tell me a story”) so
the stream runs long enough to sample.
## Quick hotspot extraction
These `rg` patterns keep the investigation grounded in the data:
```sh
# Buffer diff hot path (idle)
rg -n "custom_terminal::diff_buffers|diff_buffers" /tmp/tui2.*.sample.txt | head -n 80
# Transcript rendering hot path (streaming)
rg -n "App::render_transcript_cells|Line::render|render_spans|styled_graphemes|GraphemeCursor::next_boundary" /tmp/tui2.*.sample.txt | head -n 120
```
## Rasterization-cache validation checklist
After implementing a transcript rasterization cache, re-run the same scenarios and confirm:
- Streaming sample shifts away from `unicode_segmentation::grapheme::GraphemeCursor::next_boundary`
stacks dominating the main thread.
- CPU during streaming drops materially vs baseline for the same streaming load.
- Idle CPU does not regress (redraw gating changes can mask rendering improvements; always measure
both idle and streaming).
## Notes to record per run
- Terminal size: width × height
- Scenario: idle vs streaming (prompt + approximate response length)
- CPU snapshot: `top` (directional)
- Profile excerpt: 2050 relevant lines for the dominant stacks
## Code pointers
- `codex-rs/tui2/src/transcript_view_cache.rs`: wrapped transcript memoization + per-line
rasterization cache (cached `Cell` rows).
- `codex-rs/tui2/src/transcript_render.rs`: incremental helper used by the wrapped-line cache
(`append_wrapped_transcript_cell`).
- `codex-rs/tui2/src/app.rs`: wiring in `App::render_transcript_cells` (uses cached rows instead of
calling `Line::render_ref` every frame).