file-search: improve file query perf (#9939)
switch nucleo-matcher for nucleo and use a "file search session" w/ live updating query instead of a single hermetic run per query.
This commit is contained in:
parent
35e03a0716
commit
b8156706e6
10 changed files with 865 additions and 422 deletions
37
codex-rs/Cargo.lock
generated
37
codex-rs/Cargo.lock
generated
|
|
@ -1572,11 +1572,13 @@ version = "0.0.0"
|
|||
dependencies = [
|
||||
"anyhow",
|
||||
"clap",
|
||||
"crossbeam-channel",
|
||||
"ignore",
|
||||
"nucleo-matcher",
|
||||
"nucleo",
|
||||
"pretty_assertions",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tempfile",
|
||||
"tokio",
|
||||
]
|
||||
|
||||
|
|
@ -4899,11 +4901,20 @@ dependencies = [
|
|||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nucleo"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/helix-editor/nucleo.git?rev=4253de9faabb4e5c6d81d946a5e35a90f87347ee#4253de9faabb4e5c6d81d946a5e35a90f87347ee"
|
||||
dependencies = [
|
||||
"nucleo-matcher",
|
||||
"parking_lot",
|
||||
"rayon",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nucleo-matcher"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bf33f538733d1a5a3494b836ba913207f14d9d4a1d3cd67030c5061bdd2cac85"
|
||||
source = "git+https://github.com/helix-editor/nucleo.git?rev=4253de9faabb4e5c6d81d946a5e35a90f87347ee#4253de9faabb4e5c6d81d946a5e35a90f87347ee"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"unicode-segmentation",
|
||||
|
|
@ -6331,6 +6342,26 @@ dependencies = [
|
|||
"ratatui",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon"
|
||||
version = "1.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f"
|
||||
dependencies = [
|
||||
"either",
|
||||
"rayon-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rayon-core"
|
||||
version = "1.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
|
||||
dependencies = [
|
||||
"crossbeam-deque",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.5.15"
|
||||
|
|
|
|||
|
|
@ -128,6 +128,7 @@ clap = "4"
|
|||
clap_complete = "4"
|
||||
color-eyre = "0.6.3"
|
||||
crossterm = "0.28.1"
|
||||
crossbeam-channel = "0.5.15"
|
||||
ctor = "0.6.3"
|
||||
derive_more = "2"
|
||||
diffy = "0.4.2"
|
||||
|
|
@ -161,7 +162,7 @@ maplit = "1.0.2"
|
|||
mime_guess = "2.0.5"
|
||||
multimap = "0.10.0"
|
||||
notify = "8.2.0"
|
||||
nucleo-matcher = "0.3.1"
|
||||
nucleo = { git = "https://github.com/helix-editor/nucleo.git", rev = "4253de9faabb4e5c6d81d946a5e35a90f87347ee" }
|
||||
once_cell = "1.20.2"
|
||||
openssl-sys = "*"
|
||||
opentelemetry = "0.31.0"
|
||||
|
|
|
|||
|
|
@ -48,8 +48,7 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
|
|||
.await??;
|
||||
|
||||
let value = resp.result;
|
||||
// The path separator on Windows affects the score.
|
||||
let expected_score = if cfg!(windows) { 69 } else { 72 };
|
||||
let expected_score = 72;
|
||||
|
||||
assert_eq!(
|
||||
value,
|
||||
|
|
@ -59,16 +58,9 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
|
|||
"root": root_path.clone(),
|
||||
"path": "abexy",
|
||||
"file_name": "abexy",
|
||||
"score": 88,
|
||||
"score": 84,
|
||||
"indices": [0, 1, 2],
|
||||
},
|
||||
{
|
||||
"root": root_path.clone(),
|
||||
"path": "abcde",
|
||||
"file_name": "abcde",
|
||||
"score": 74,
|
||||
"indices": [0, 1, 4],
|
||||
},
|
||||
{
|
||||
"root": root_path.clone(),
|
||||
"path": sub_abce_rel,
|
||||
|
|
@ -76,6 +68,13 @@ async fn test_fuzzy_file_search_sorts_and_includes_indices() -> Result<()> {
|
|||
"score": expected_score,
|
||||
"indices": [4, 5, 7],
|
||||
},
|
||||
{
|
||||
"root": root_path.clone(),
|
||||
"path": "abcde",
|
||||
"file_name": "abcde",
|
||||
"score": 71,
|
||||
"indices": [0, 1, 4],
|
||||
},
|
||||
]
|
||||
})
|
||||
);
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
use async_trait::async_trait;
|
||||
use std::cmp::Reverse;
|
||||
use std::ffi::OsStr;
|
||||
use std::io::{self};
|
||||
|
|
@ -7,8 +8,6 @@ use std::path::Path;
|
|||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
|
||||
use async_trait::async_trait;
|
||||
use time::OffsetDateTime;
|
||||
use time::PrimitiveDateTime;
|
||||
use time::format_description::FormatItem;
|
||||
|
|
|
|||
|
|
@ -15,11 +15,13 @@ path = "src/lib.rs"
|
|||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
clap = { workspace = true, features = ["derive"] }
|
||||
crossbeam-channel = { workspace = true }
|
||||
ignore = { workspace = true }
|
||||
nucleo-matcher = { workspace = true }
|
||||
nucleo = { workspace = true }
|
||||
serde = { workspace = true, features = ["derive"] }
|
||||
serde_json = { workspace = true }
|
||||
tokio = { workspace = true, features = ["full"] }
|
||||
|
||||
[dev-dependencies]
|
||||
pretty_assertions = { workspace = true }
|
||||
tempfile = { workspace = true }
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1507,9 +1507,7 @@ impl App {
|
|||
tui.frame_requester().schedule_frame();
|
||||
}
|
||||
AppEvent::StartFileSearch(query) => {
|
||||
if !query.is_empty() {
|
||||
self.file_search.on_user_query(query);
|
||||
}
|
||||
self.file_search.on_user_query(query);
|
||||
}
|
||||
AppEvent::FileSearchResult { query, matches } => {
|
||||
self.chat_widget.apply_file_search_result(query, matches);
|
||||
|
|
|
|||
|
|
@ -2380,6 +2380,11 @@ impl ChatComposer {
|
|||
// When browsing input history (shell-style Up/Down recall), skip all popup
|
||||
// synchronization so nothing steals focus from continued history navigation.
|
||||
if browsing_history {
|
||||
if self.current_file_query.is_some() {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(String::new()));
|
||||
self.current_file_query = None;
|
||||
}
|
||||
self.active_popup = ActivePopup::None;
|
||||
return;
|
||||
}
|
||||
|
|
@ -2390,12 +2395,22 @@ impl ChatComposer {
|
|||
self.sync_command_popup(allow_command_popup);
|
||||
|
||||
if matches!(self.active_popup, ActivePopup::Command(_)) {
|
||||
if self.current_file_query.is_some() {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(String::new()));
|
||||
self.current_file_query = None;
|
||||
}
|
||||
self.dismissed_file_popup_token = None;
|
||||
self.dismissed_skill_popup_token = None;
|
||||
return;
|
||||
}
|
||||
|
||||
if let Some(token) = skill_token {
|
||||
if self.current_file_query.is_some() {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(String::new()));
|
||||
self.current_file_query = None;
|
||||
}
|
||||
self.sync_skill_popup(token);
|
||||
return;
|
||||
}
|
||||
|
|
@ -2406,6 +2421,11 @@ impl ChatComposer {
|
|||
return;
|
||||
}
|
||||
|
||||
if self.current_file_query.is_some() {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(String::new()));
|
||||
self.current_file_query = None;
|
||||
}
|
||||
self.dismissed_file_popup_token = None;
|
||||
if matches!(
|
||||
self.active_popup,
|
||||
|
|
@ -2539,7 +2559,10 @@ impl ChatComposer {
|
|||
return;
|
||||
}
|
||||
|
||||
if !query.is_empty() {
|
||||
if query.is_empty() {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(String::new()));
|
||||
} else {
|
||||
self.app_event_tx
|
||||
.send(AppEvent::StartFileSearch(query.clone()));
|
||||
}
|
||||
|
|
@ -2563,7 +2586,11 @@ impl ChatComposer {
|
|||
}
|
||||
}
|
||||
|
||||
self.current_file_query = Some(query);
|
||||
if query.is_empty() {
|
||||
self.current_file_query = None;
|
||||
} else {
|
||||
self.current_file_query = Some(query);
|
||||
}
|
||||
self.dismissed_file_popup_token = None;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -43,18 +43,10 @@ impl FileSearchPopup {
|
|||
return;
|
||||
}
|
||||
|
||||
// Determine if current matches are still relevant.
|
||||
let keep_existing = query.starts_with(&self.display_query);
|
||||
|
||||
self.pending_query.clear();
|
||||
self.pending_query.push_str(query);
|
||||
|
||||
self.waiting = true; // waiting for new results
|
||||
|
||||
if !keep_existing {
|
||||
self.matches.clear();
|
||||
self.state.reset();
|
||||
}
|
||||
}
|
||||
|
||||
/// Put the popup into an "idle" state used for an empty query (just "@").
|
||||
|
|
|
|||
|
|
@ -1,32 +1,15 @@
|
|||
//! Helper that owns the debounce/cancellation logic for `@` file searches.
|
||||
//! Session-based orchestration for `@` file searches.
|
||||
//!
|
||||
//! `ChatComposer` publishes *every* change of the `@token` as
|
||||
//! `AppEvent::StartFileSearch(query)`.
|
||||
//! This struct receives those events and decides when to actually spawn the
|
||||
//! expensive search (handled in the main `App` thread). It tries to ensure:
|
||||
//!
|
||||
//! - Even when the user types long text quickly, they will start seeing results
|
||||
//! after a short delay using an early version of what they typed.
|
||||
//! - At most one search is in-flight at any time.
|
||||
//!
|
||||
//! It works as follows:
|
||||
//!
|
||||
//! 1. First query starts a debounce timer.
|
||||
//! 2. While the timer is pending, the latest query from the user is stored.
|
||||
//! 3. When the timer fires, it is cleared, and a search is done for the most
|
||||
//! recent query.
|
||||
//! 4. If there is a in-flight search that is not a prefix of the latest thing
|
||||
//! the user typed, it is cancelled.
|
||||
//! `ChatComposer` publishes every change of the `@token` as
|
||||
//! `AppEvent::StartFileSearch(query)`. This manager owns a single
|
||||
//! `codex-file-search` session for the current search root, updates the query
|
||||
//! on every keystroke, and drops the session when the query becomes empty.
|
||||
|
||||
use codex_file_search as file_search;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::Mutex;
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::app_event::AppEvent;
|
||||
use crate::app_event_sender::AppEventSender;
|
||||
|
|
@ -34,35 +17,16 @@ use crate::app_event_sender::AppEventSender;
|
|||
const MAX_FILE_SEARCH_RESULTS: NonZeroUsize = NonZeroUsize::new(20).unwrap();
|
||||
const NUM_FILE_SEARCH_THREADS: NonZeroUsize = NonZeroUsize::new(2).unwrap();
|
||||
|
||||
/// How long to wait after a keystroke before firing the first search when none
|
||||
/// is currently running. Keeps early queries more meaningful.
|
||||
const FILE_SEARCH_DEBOUNCE: Duration = Duration::from_millis(100);
|
||||
|
||||
const ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL: Duration = Duration::from_millis(20);
|
||||
|
||||
/// State machine for file-search orchestration.
|
||||
pub(crate) struct FileSearchManager {
|
||||
/// Unified state guarded by one mutex.
|
||||
state: Arc<Mutex<SearchState>>,
|
||||
|
||||
search_dir: PathBuf,
|
||||
app_tx: AppEventSender,
|
||||
}
|
||||
|
||||
struct SearchState {
|
||||
/// Latest query typed by user (updated every keystroke).
|
||||
latest_query: String,
|
||||
|
||||
/// true if a search is currently scheduled.
|
||||
is_search_scheduled: bool,
|
||||
|
||||
/// If there is an active search, this will be the query being searched.
|
||||
active_search: Option<ActiveSearch>,
|
||||
}
|
||||
|
||||
struct ActiveSearch {
|
||||
query: String,
|
||||
cancellation_token: Arc<AtomicBool>,
|
||||
session: Option<file_search::FileSearchSession>,
|
||||
session_token: usize,
|
||||
}
|
||||
|
||||
impl FileSearchManager {
|
||||
|
|
@ -70,8 +34,8 @@ impl FileSearchManager {
|
|||
Self {
|
||||
state: Arc::new(Mutex::new(SearchState {
|
||||
latest_query: String::new(),
|
||||
is_search_scheduled: false,
|
||||
active_search: None,
|
||||
session: None,
|
||||
session_token: 0,
|
||||
})),
|
||||
search_dir,
|
||||
app_tx: tx,
|
||||
|
|
@ -80,120 +44,85 @@ impl FileSearchManager {
|
|||
|
||||
/// Call whenever the user edits the `@` token.
|
||||
pub fn on_user_query(&self, query: String) {
|
||||
{
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let mut st = self.state.lock().unwrap();
|
||||
if query == st.latest_query {
|
||||
// No change, nothing to do.
|
||||
return;
|
||||
}
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let mut st = self.state.lock().unwrap();
|
||||
if query == st.latest_query {
|
||||
return;
|
||||
}
|
||||
st.latest_query.clear();
|
||||
st.latest_query.push_str(&query);
|
||||
|
||||
// Update latest query.
|
||||
st.latest_query.clear();
|
||||
st.latest_query.push_str(&query);
|
||||
|
||||
// If there is an in-flight search that is definitely obsolete,
|
||||
// cancel it now.
|
||||
if let Some(active_search) = &st.active_search
|
||||
&& !query.starts_with(&active_search.query)
|
||||
{
|
||||
active_search
|
||||
.cancellation_token
|
||||
.store(true, Ordering::Relaxed);
|
||||
st.active_search = None;
|
||||
}
|
||||
|
||||
// Schedule a search to run after debounce.
|
||||
if !st.is_search_scheduled {
|
||||
st.is_search_scheduled = true;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
if query.is_empty() {
|
||||
st.session.take();
|
||||
return;
|
||||
}
|
||||
|
||||
// If we are here, we set `st.is_search_scheduled = true` before
|
||||
// dropping the lock. This means we are the only thread that can spawn a
|
||||
// debounce timer.
|
||||
let state = self.state.clone();
|
||||
let search_dir = self.search_dir.clone();
|
||||
let tx_clone = self.app_tx.clone();
|
||||
thread::spawn(move || {
|
||||
// Always do a minimum debounce, but then poll until the
|
||||
// `active_search` is cleared.
|
||||
thread::sleep(FILE_SEARCH_DEBOUNCE);
|
||||
loop {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
if state.lock().unwrap().active_search.is_none() {
|
||||
break;
|
||||
}
|
||||
thread::sleep(ACTIVE_SEARCH_COMPLETE_POLL_INTERVAL);
|
||||
}
|
||||
|
||||
// The debounce timer has expired, so start a search using the
|
||||
// latest query.
|
||||
let cancellation_token = Arc::new(AtomicBool::new(false));
|
||||
let token = cancellation_token.clone();
|
||||
let query = {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let mut st = state.lock().unwrap();
|
||||
let query = st.latest_query.clone();
|
||||
st.is_search_scheduled = false;
|
||||
st.active_search = Some(ActiveSearch {
|
||||
query: query.clone(),
|
||||
cancellation_token: token,
|
||||
});
|
||||
query
|
||||
};
|
||||
|
||||
FileSearchManager::spawn_file_search(
|
||||
query,
|
||||
search_dir,
|
||||
tx_clone,
|
||||
cancellation_token,
|
||||
state,
|
||||
);
|
||||
});
|
||||
if st.session.is_none() {
|
||||
self.start_session_locked(&mut st);
|
||||
}
|
||||
if let Some(session) = st.session.as_ref() {
|
||||
session.update_query(&query);
|
||||
}
|
||||
}
|
||||
|
||||
fn spawn_file_search(
|
||||
query: String,
|
||||
search_dir: PathBuf,
|
||||
tx: AppEventSender,
|
||||
cancellation_token: Arc<AtomicBool>,
|
||||
search_state: Arc<Mutex<SearchState>>,
|
||||
) {
|
||||
let compute_indices = true;
|
||||
std::thread::spawn(move || {
|
||||
let matches = file_search::run(
|
||||
&query,
|
||||
MAX_FILE_SEARCH_RESULTS,
|
||||
&search_dir,
|
||||
Vec::new(),
|
||||
NUM_FILE_SEARCH_THREADS,
|
||||
cancellation_token.clone(),
|
||||
compute_indices,
|
||||
true,
|
||||
)
|
||||
.map(|res| res.matches)
|
||||
.unwrap_or_default();
|
||||
|
||||
let is_cancelled = cancellation_token.load(Ordering::Relaxed);
|
||||
if !is_cancelled {
|
||||
tx.send(AppEvent::FileSearchResult { query, matches });
|
||||
fn start_session_locked(&self, st: &mut SearchState) {
|
||||
st.session_token = st.session_token.wrapping_add(1);
|
||||
let session_token = st.session_token;
|
||||
let reporter = Arc::new(TuiSessionReporter {
|
||||
state: self.state.clone(),
|
||||
app_tx: self.app_tx.clone(),
|
||||
session_token,
|
||||
});
|
||||
let session = file_search::create_session(
|
||||
&self.search_dir,
|
||||
file_search::SessionOptions {
|
||||
limit: MAX_FILE_SEARCH_RESULTS,
|
||||
exclude: Vec::new(),
|
||||
threads: NUM_FILE_SEARCH_THREADS,
|
||||
compute_indices: true,
|
||||
respect_gitignore: true,
|
||||
},
|
||||
reporter,
|
||||
);
|
||||
match session {
|
||||
Ok(session) => st.session = Some(session),
|
||||
Err(err) => {
|
||||
tracing::warn!("file search session failed to start: {err}");
|
||||
st.session = None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the active search state. Do a pointer comparison to verify
|
||||
// that we are clearing the ActiveSearch that corresponds to the
|
||||
// cancellation token we were given.
|
||||
{
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let mut st = search_state.lock().unwrap();
|
||||
if let Some(active_search) = &st.active_search
|
||||
&& Arc::ptr_eq(&active_search.cancellation_token, &cancellation_token)
|
||||
{
|
||||
st.active_search = None;
|
||||
}
|
||||
}
|
||||
struct TuiSessionReporter {
|
||||
state: Arc<Mutex<SearchState>>,
|
||||
app_tx: AppEventSender,
|
||||
session_token: usize,
|
||||
}
|
||||
|
||||
impl TuiSessionReporter {
|
||||
fn send_snapshot(&self, snapshot: &file_search::FileSearchSnapshot) {
|
||||
#[expect(clippy::unwrap_used)]
|
||||
let st = self.state.lock().unwrap();
|
||||
if st.session_token != self.session_token
|
||||
|| st.latest_query.is_empty()
|
||||
|| snapshot.query.is_empty()
|
||||
{
|
||||
return;
|
||||
}
|
||||
let query = snapshot.query.clone();
|
||||
drop(st);
|
||||
self.app_tx.send(AppEvent::FileSearchResult {
|
||||
query,
|
||||
matches: snapshot.matches.clone(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
impl file_search::SessionReporter for TuiSessionReporter {
|
||||
fn on_update(&self, snapshot: &file_search::FileSearchSnapshot) {
|
||||
self.send_snapshot(snapshot);
|
||||
}
|
||||
|
||||
fn on_complete(&self) {}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue