file-search: improve file query perf (#9939)

switch nucleo-matcher for nucleo and use a "file search session" w/ live
updating query instead of a single hermetic run per query.
This commit is contained in:
Jeremy Rose 2026-01-28 10:54:43 -08:00 committed by GitHub
parent 35e03a0716
commit b8156706e6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 865 additions and 422 deletions

37
codex-rs/Cargo.lock generated
View file

@ -1572,11 +1572,13 @@ version = "0.0.0"
dependencies = [
"anyhow",
"clap",
"crossbeam-channel",
"ignore",
"nucleo-matcher",
"nucleo",
"pretty_assertions",
"serde",
"serde_json",
"tempfile",
"tokio",
]
@ -4899,11 +4901,20 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "nucleo"
version = "0.5.0"
source = "git+https://github.com/helix-editor/nucleo.git?rev=4253de9faabb4e5c6d81d946a5e35a90f87347ee#4253de9faabb4e5c6d81d946a5e35a90f87347ee"
dependencies = [
"nucleo-matcher",
"parking_lot",
"rayon",
]
[[package]]
name = "nucleo-matcher"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
source = "git+https://github.com/helix-editor/nucleo.git?rev=4253de9faabb4e5c6d81d946a5e35a90f87347ee#4253de9faabb4e5c6d81d946a5e35a90f87347ee"
dependencies = [
"memchr",
"unicode-segmentation",
@ -6331,6 +6342,26 @@ dependencies = [
"ratatui",
]
[[package]]
name = "rayon"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]]
name = "redox_syscall"
version = "0.5.15"

View file

@ -128,6 +128,7 @@ clap = "4"
clap_complete = "4"
color-eyre = "0.6.3"
crossterm = "0.28.1"
crossbeam-channel = "0.5.15"
ctor = "0.6.3"
derive_more = "2"
diffy = "0.4.2"
@ -161,7 +162,7 @@ maplit = "1.0.2"
mime_guess = "2.0.5"
multimap = "0.10.0"
notify = "8.2.0"
nucleo-matcher = "0.3.1"
nucleo = { git = "https://github.com/helix-editor/nucleo.git", rev = "4253de9faabb4e5c6d81d946a5e35a90f87347ee" }
once_cell = "1.20.2"
openssl-sys = "*"
opentelemetry = "0.31.0"

View file

@ -48,8 +48,7 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
.await??;
let value = resp.result;
// The path separator on Windows affects the score.
let expected_score = if cfg!(windows) { 69 } else { 72 };
let expected_score = 72;
assert_eq!(
value,
@ -59,16 +58,9 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
"root": root_path.clone(),
"path": "abexy",
"file_name": "abexy",
"score": 88,
"score": 84,
"indices": [0, 1, 2],
},
{
"root": root_path.clone(),
"path": "abcde",
"file_name": "abcde",
"score": 74,
"indices": [0, 1, 4],
},
{
"root": root_path.clone(),
"path": sub_abce_rel,
@ -76,6 +68,13 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
"score": expected_score,
"indices": [4, 5, 7],
},
{
"root": root_path.clone(),
"path": "abcde",
"file_name": "abcde",
"score": 71,
"indices": [0, 1, 4],
},
]
})
);

View file

@ -1,3 +1,4 @@
use async_trait::async_trait;
use std::cmp::Reverse;
use std::ffi::OsStr;
use std::io::{self};
@ -7,8 +8,6 @@ use std::path::Path;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use async_trait::async_trait;
use time::OffsetDateTime;
use time::PrimitiveDateTime;
use time::format_description::FormatItem;

View file

@ -15,11 +15,13 @@ path = "src/lib.rs"
[dependencies]
anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] }
crossbeam-channel = { workspace = true }
ignore = { workspace = true }
nucleo-matcher = { workspace = true }
nucleo = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
tokio = { workspace = true, features = ["full"] }
[dev-dependencies]
pretty_assertions = { workspace = true }
tempfile = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -1507,9 +1507,7 @@ impl App {
tui.frame_requester().schedule_frame();
}
AppEvent::StartFileSearch(query) => {
if !query.is_empty() {
self.file_search.on_user_query(query);
}
self.file_search.on_user_query(query);
}
AppEvent::FileSearchResult { query, matches } => {
self.chat_widget.apply_file_search_result(query, matches);

View file

@ -2380,6 +2380,11 @@ impl ChatComposer {
// When browsing input history (shell-style Up/Down recall), skip all popup
// synchronization so nothing steals focus from continued history navigation.
if browsing_history {
if self.current_file_query.is_some() {
self.app_event_tx
.send(AppEvent::StartFileSearch(String::new()));
self.current_file_query = None;
}
self.active_popup = ActivePopup::None;
return;
}
@ -2390,12 +2395,22 @@ impl ChatComposer {
self.sync_command_popup(allow_command_popup);
if matches!(self.active_popup, ActivePopup::Command(_)) {
if self.current_file_query.is_some() {
self.app_event_tx
.send(AppEvent::StartFileSearch(String::new()));
self.current_file_query = None;
}
self.dismissed_file_popup_token = None;
self.dismissed_skill_popup_token = None;
return;
}
if let Some(token) = skill_token {
if self.current_file_query.is_some() {
self.app_event_tx
.send(AppEvent::StartFileSearch(String::new()));
self.current_file_query = None;
}
self.sync_skill_popup(token);
return;
}
@ -2406,6 +2421,11 @@ impl ChatComposer {
return;
}
if self.current_file_query.is_some() {
self.app_event_tx
.send(AppEvent::StartFileSearch(String::new()));
self.current_file_query = None;
}
self.dismissed_file_popup_token = None;
if matches!(
self.active_popup,
@ -2539,7 +2559,10 @@ impl ChatComposer {
return;
}
if !query.is_empty() {
if query.is_empty() {
self.app_event_tx
.send(AppEvent::StartFileSearch(String::new()));
} else {
self.app_event_tx
.send(AppEvent::StartFileSearch(query.clone()));
}
@ -2563,7 +2586,11 @@ impl ChatComposer {
}
}
self.current_file_query = Some(query);
if query.is_empty() {
self.current_file_query = None;
} else {
self.current_file_query = Some(query);
}
self.dismissed_file_popup_token = None;
}

View file

@ -43,18 +43,10 @@ impl FileSearchPopup {
return;
}
// Determine if current matches are still relevant.
let keep_existing = query.starts_with(&self.display_query);
self.pending_query.clear();
self.pending_query.push_str(query);
self.waiting = true; // waiting for new results
if !keep_existing {
self.matches.clear();
self.state.reset();
}
}
/// Put the popup into an "idle" state used for an empty query (just "@").

View file

@ -1,32 +1,15 @@
//! Helper that owns the debounce/cancellation logic for `@` file searches.
//! Session-based orchestration for `@` file searches.
//!
//! `ChatComposer` publishes *every* change of the `@token` as
//! `AppEvent::StartFileSearch(query)`.
//! This struct receives those events and decides when to actually spawn the
//! expensive search (handled in the main `App` thread). It tries to ensure:
//!
//! - Even when the user types long text quickly, they will start seeing results
//! after a short delay using an early version of what they typed.
//! - At most one search is in-flight at any time.
//!
//! It works as follows:
//!
//! 1. First query starts a debounce timer.
//! 2. While the timer is pending, the latest query from the user is stored.
//! 3. When the timer fires, it is cleared, and a search is done for the most
//! recent query.
//! 4. If there is a in-flight search that is not a prefix of the latest thing
//! the user typed, it is cancelled.
//! `ChatComposer` publishes every change of the `@token` as
//! `AppEvent::StartFileSearch(query)`. This manager owns a single
//! `codex-file-search` session for the current search root, updates the query
//! on every keystroke, and drops the session when the query becomes empty.
use codex_file_search as file_search;
use std::num::NonZeroUsize;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
use std::thread;
use std::time::Duration;
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
@ -34,35 +17,16 @@ use crate::app_event_sender::AppEventSender;
const MAX_FILE_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(20).unwrap();
const NUM_FILE_SEARCH_THREADS: NonZeroUsize = NonZeroUsize::new(2).unwrap();
/// How long to wait after a keystroke before firing the first search when none
/// is currently running. Keeps early queries more meaningful.
const FILE_SEARCH_DEBOUNCE: Duration = Duration::from_millis(100);
const ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL: Duration = Duration::from_millis(20);
/// State machine for file-search orchestration.
pub(crate) struct FileSearchManager {
/// Unified state guarded by one mutex.
state: Arc<Mutex<SearchState>>,
search_dir: PathBuf,
app_tx: AppEventSender,
}
struct SearchState {
/// Latest query typed by user (updated every keystroke).
latest_query: String,
/// true if a search is currently scheduled.
is_search_scheduled: bool,
/// If there is an active search, this will be the query being searched.
active_search: Option<ActiveSearch>,
}
struct ActiveSearch {
query: String,
cancellation_token: Arc<AtomicBool>,
session: Option<file_search::FileSearchSession>,
session_token: usize,
}
impl FileSearchManager {
@ -70,8 +34,8 @@ impl FileSearchManager {
Self {
state: Arc::new(Mutex::new(SearchState {
latest_query: String::new(),
is_search_scheduled: false,
active_search: None,
session: None,
session_token: 0,
})),
search_dir,
app_tx: tx,
@ -80,120 +44,85 @@ impl FileSearchManager {
/// Call whenever the user edits the `@` token.
pub fn on_user_query(&self, query: String) {
{
#[expect(clippy::unwrap_used)]
let mut st = self.state.lock().unwrap();
if query == st.latest_query {
// No change, nothing to do.
return;
}
#[expect(clippy::unwrap_used)]
let mut st = self.state.lock().unwrap();
if query == st.latest_query {
return;
}
st.latest_query.clear();
st.latest_query.push_str(&query);
// Update latest query.
st.latest_query.clear();
st.latest_query.push_str(&query);
// If there is an in-flight search that is definitely obsolete,
// cancel it now.
if let Some(active_search) = &st.active_search
&& !query.starts_with(&active_search.query)
{
active_search
.cancellation_token
.store(true, Ordering::Relaxed);
st.active_search = None;
}
// Schedule a search to run after debounce.
if !st.is_search_scheduled {
st.is_search_scheduled = true;
} else {
return;
}
if query.is_empty() {
st.session.take();
return;
}
// If we are here, we set `st.is_search_scheduled = true` before
// dropping the lock. This means we are the only thread that can spawn a
// debounce timer.
let state = self.state.clone();
let search_dir = self.search_dir.clone();
let tx_clone = self.app_tx.clone();
thread::spawn(move || {
// Always do a minimum debounce, but then poll until the
// `active_search` is cleared.
thread::sleep(FILE_SEARCH_DEBOUNCE);
loop {
#[expect(clippy::unwrap_used)]
if state.lock().unwrap().active_search.is_none() {
break;
}
thread::sleep(ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL);
}
// The debounce timer has expired, so start a search using the
// latest query.
let cancellation_token = Arc::new(AtomicBool::new(false));
let token = cancellation_token.clone();
let query = {
#[expect(clippy::unwrap_used)]
let mut st = state.lock().unwrap();
let query = st.latest_query.clone();
st.is_search_scheduled = false;
st.active_search = Some(ActiveSearch {
query: query.clone(),
cancellation_token: token,
});
query
};
FileSearchManager::spawn_file_search(
query,
search_dir,
tx_clone,
cancellation_token,
state,
);
});
if st.session.is_none() {
self.start_session_locked(&mut st);
}
if let Some(session) = st.session.as_ref() {
session.update_query(&query);
}
}
fn spawn_file_search(
query: String,
search_dir: PathBuf,
tx: AppEventSender,
cancellation_token: Arc<AtomicBool>,
search_state: Arc<Mutex<SearchState>>,
) {
let compute_indices = true;
std::thread::spawn(move || {
let matches = file_search::run(
&query,
MAX_FILE_SEARCH_RESULTS,
&search_dir,
Vec::new(),
NUM_FILE_SEARCH_THREADS,
cancellation_token.clone(),
compute_indices,
true,
)
.map(|res| res.matches)
.unwrap_or_default();
let is_cancelled = cancellation_token.load(Ordering::Relaxed);
if !is_cancelled {
tx.send(AppEvent::FileSearchResult { query, matches });
fn start_session_locked(&self, st: &mut SearchState) {
st.session_token = st.session_token.wrapping_add(1);
let session_token = st.session_token;
let reporter = Arc::new(TuiSessionReporter {
state: self.state.clone(),
app_tx: self.app_tx.clone(),
session_token,
});
let session = file_search::create_session(
&self.search_dir,
file_search::SessionOptions {
limit: MAX_FILE_SEARCH_RESULTS,
exclude: Vec::new(),
threads: NUM_FILE_SEARCH_THREADS,
compute_indices: true,
respect_gitignore: true,
},
reporter,
);
match session {
Ok(session) => st.session = Some(session),
Err(err) => {
tracing::warn!("file search session failed to start: {err}");
st.session = None;
}
}
}
}
// Reset the active search state. Do a pointer comparison to verify
// that we are clearing the ActiveSearch that corresponds to the
// cancellation token we were given.
{
#[expect(clippy::unwrap_used)]
let mut st = search_state.lock().unwrap();
if let Some(active_search) = &st.active_search
&& Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
{
st.active_search = None;
}
}
struct TuiSessionReporter {
state: Arc<Mutex<SearchState>>,
app_tx: AppEventSender,
session_token: usize,
}
impl TuiSessionReporter {
fn send_snapshot(&self, snapshot: &file_search::FileSearchSnapshot) {
#[expect(clippy::unwrap_used)]
let st = self.state.lock().unwrap();
if st.session_token != self.session_token
|| st.latest_query.is_empty()
|| snapshot.query.is_empty()
{
return;
}
let query = snapshot.query.clone();
drop(st);
self.app_tx.send(AppEvent::FileSearchResult {
query,
matches: snapshot.matches.clone(),
});
}
}
impl file_search::SessionReporter for TuiSessionReporter {
fn on_update(&self, snapshot: &file_search::FileSearchSnapshot) {
self.send_snapshot(snapshot);
}
fn on_complete(&self) {}
}