From 02abd9a8eab953f6936453c69bb567660ce91045 Mon Sep 17 00:00:00 2001 From: Anton Panasenko Date: Sun, 15 Feb 2026 19:18:41 -0800 Subject: [PATCH] feat: persist and restore codex app's tools after search (#11780) ### What changed 1. Removed per-turn MCP selection reset in `core/src/tasks/mod.rs`. 2. Added `SessionState::set_mcp_tool_selection(Vec)` in `core/src/state/session.rs` for authoritative restore behavior (deduped, order-preserving, empty clears). 3. Added rollout parsing in `core/src/codex.rs` to recover `active_selected_tools` from prior `search_tool_bm25` outputs: - tracks matching `call_id`s - parses function output text JSON - extracts `active_selected_tools` - latest valid payload wins - malformed/non-matching payloads are ignored 4. Applied restore logic to resumed and forked startup paths in `core/src/codex.rs`. 5. Updated instruction text to session/thread scope in `core/templates/search_tool/tool_description.md`. 6. Expanded tests in `core/tests/suite/search_tool.rs`, plus unit coverage in: - `core/src/codex.rs` - `core/src/state/session.rs` ### Behavior after change 1. Search activates matched tools. 2. Additional searches union into active selection. 3. Selection survives new turns in the same thread. 4. Resume/fork restores selection from rollout history. 5. Separate threads do not inherit selection unless forked. --- codex-rs/core/src/codex.rs | 248 ++++- codex-rs/core/src/mcp_connection_manager.rs | 10 + codex-rs/core/src/state/session.rs | 55 ++ codex-rs/core/src/tasks/mod.rs | 1 - codex-rs/core/src/tools/handlers/mod.rs | 1 + .../src/tools/handlers/search_tool_bm25.rs | 32 +- codex-rs/core/src/tools/spec.rs | 7 +- .../templates/search_tool/tool_description.md | 10 +- .../core/tests/common/apps_test_server.rs | 158 ++++ codex-rs/core/tests/common/lib.rs | 1 + codex-rs/core/tests/suite/client.rs | 9 +- codex-rs/core/tests/suite/search_tool.rs | 870 ++++++++++++++---- 12 files changed, 1201 insertions(+), 201 deletions(-) create mode 100644 codex-rs/core/tests/common/apps_test_server.rs diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 10b070cf0..437fd9e80 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -150,6 +150,7 @@ use crate::mcp::with_codex_apps_mcp; use crate::mcp_connection_manager::McpConnectionManager; use crate::mcp_connection_manager::filter_codex_apps_mcp_tools_only; use crate::mcp_connection_manager::filter_mcp_tools_by_name; +use crate::mcp_connection_manager::filter_non_codex_apps_mcp_tools_only; use crate::memories; use crate::mentions::build_connector_slug_counts; use crate::mentions::build_skill_name_counts; @@ -223,6 +224,7 @@ use crate::tasks::SessionTask; use crate::tasks::SessionTaskContext; use crate::tools::ToolRouter; use crate::tools::context::SharedTurnDiffTracker; +use crate::tools::handlers::SEARCH_TOOL_BM25_TOOL_NAME; use crate::tools::js_repl::JsReplHandle; use crate::tools::network_approval::NetworkApprovalService; use crate::tools::network_approval::build_blocked_request_observer; @@ -1489,6 +1491,11 @@ impl Session { state.merge_mcp_tool_selection(tool_names) } + pub(crate) async fn set_mcp_tool_selection(&self, tool_names: Vec) { + let mut state = self.state.lock().await; + state.set_mcp_tool_selection(tool_names); + } + pub(crate) async fn get_mcp_tool_selection(&self) -> Option> { let state = self.state.lock().await; state.get_mcp_tool_selection() @@ -1522,6 +1529,7 @@ impl Session { async fn record_initial_history(&self, conversation_history: InitialHistory) { let turn_context = self.new_default_turn().await; + self.clear_mcp_tool_selection().await; match conversation_history { InitialHistory::New => { // Build and record initial items (user instructions + environment context) @@ -1537,6 +1545,8 @@ impl Session { } InitialHistory::Resumed(resumed_history) => { let rollout_items = resumed_history.history; + let restored_tool_selection = + Self::extract_mcp_tool_selection_from_rollout(&rollout_items); let previous_model = Self::last_rollout_model_name(&rollout_items) .map(std::string::ToString::to_string); { @@ -1578,12 +1588,17 @@ impl Session { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } + if let Some(selected_tools) = restored_tool_selection { + self.set_mcp_tool_selection(selected_tools).await; + } // Defer seeding the session's initial context until the first turn starts so // turn/start overrides can be merged before we write to the rollout. self.flush_rollout().await; } InitialHistory::Forked(rollout_items) => { + let restored_tool_selection = + Self::extract_mcp_tool_selection_from_rollout(&rollout_items); let previous_model = Self::last_rollout_model_name(&rollout_items) .map(std::string::ToString::to_string); self.set_previous_model(previous_model).await; @@ -1603,6 +1618,9 @@ impl Session { let mut state = self.state.lock().await; state.set_token_info(Some(info)); } + if let Some(selected_tools) = restored_tool_selection { + self.set_mcp_tool_selection(selected_tools).await; + } // If persisting, persist all rollout items as-is (recorder filters) if !rollout_items.is_empty() { @@ -1644,6 +1662,54 @@ impl Session { }) } + fn extract_mcp_tool_selection_from_rollout( + rollout_items: &[RolloutItem], + ) -> Option> { + let mut search_call_ids = HashSet::new(); + let mut active_selected_tools: Option> = None; + + for item in rollout_items { + let RolloutItem::ResponseItem(response_item) = item else { + continue; + }; + match response_item { + ResponseItem::FunctionCall { name, call_id, .. } => { + if name == SEARCH_TOOL_BM25_TOOL_NAME { + search_call_ids.insert(call_id.clone()); + } + } + ResponseItem::FunctionCallOutput { call_id, output } => { + if !search_call_ids.contains(call_id) { + continue; + } + let Some(content) = output.body.to_text() else { + continue; + }; + let Ok(payload) = serde_json::from_str::(&content) else { + continue; + }; + let Some(selected_tools) = payload + .get("active_selected_tools") + .and_then(Value::as_array) + else { + continue; + }; + let Some(selected_tools) = selected_tools + .iter() + .map(|value| value.as_str().map(str::to_string)) + .collect::>>() + else { + continue; + }; + active_selected_tools = Some(selected_tools); + } + _ => {} + } + } + + active_selected_tools + } + async fn previous_model(&self) -> Option { let state = self.state.lock().await; state.previous_model() @@ -4899,6 +4965,10 @@ async fn built_tools( None }; + let app_tools = connectors + .as_ref() + .map(|connectors| filter_codex_apps_mcp_tools(&mcp_tools, connectors)); + if let Some(connectors) = connectors.as_ref() { let skill_name_counts_lower = skills_outcome.map_or_else(HashMap::new, |outcome| { build_skill_name_counts(&outcome.skills, &outcome.disabled_paths).1 @@ -4911,24 +4981,20 @@ async fn built_tools( &skill_name_counts_lower, ); - let mut selected_mcp_tools = - if let Some(selected_tools) = sess.get_mcp_tool_selection().await { - filter_mcp_tools_by_name(&mcp_tools, &selected_tools) - } else { - HashMap::new() - }; + let mut selected_mcp_tools = filter_non_codex_apps_mcp_tools_only(&mcp_tools); - let apps_mcp_tools = - filter_codex_apps_mcp_tools_only(&mcp_tools, explicitly_enabled.as_ref()); - selected_mcp_tools.extend(apps_mcp_tools); + if let Some(selected_tools) = sess.get_mcp_tool_selection().await { + selected_mcp_tools.extend(filter_mcp_tools_by_name(&mcp_tools, &selected_tools)); + } + + selected_mcp_tools.extend(filter_codex_apps_mcp_tools_only( + &mcp_tools, + explicitly_enabled.as_ref(), + )); mcp_tools = selected_mcp_tools; } - let app_tools = connectors - .as_ref() - .map(|connectors| filter_codex_apps_mcp_tools(&mcp_tools, connectors)); - Ok(Arc::new(ToolRouter::from_config( &turn_context.tools_config, Some( @@ -5789,6 +5855,22 @@ mod tests { } } + fn function_call_rollout_item(name: &str, call_id: &str) -> RolloutItem { + RolloutItem::ResponseItem(ResponseItem::FunctionCall { + id: None, + name: name.to_string(), + arguments: "{}".to_string(), + call_id: call_id.to_string(), + }) + } + + fn function_call_output_rollout_item(call_id: &str, output: &str) -> RolloutItem { + RolloutItem::ResponseItem(ResponseItem::FunctionCallOutput { + call_id: call_id.to_string(), + output: FunctionCallOutputPayload::from_text(output.to_string()), + }) + } + #[tokio::test] async fn get_base_instructions_no_user_content() { let prompt_with_apply_patch_instructions = @@ -5978,6 +6060,46 @@ mod tests { assert_eq!(connector_ids, HashSet::::new()); } + #[test] + fn non_app_mcp_tools_remain_visible_without_search_selection() { + let mcp_tools = HashMap::from([ + ( + "mcp__codex_apps__calendar_create_event".to_string(), + make_mcp_tool( + CODEX_APPS_MCP_SERVER_NAME, + "calendar_create_event", + Some("calendar"), + Some("Calendar"), + ), + ), + ( + "mcp__rmcp__echo".to_string(), + make_mcp_tool("rmcp", "echo", None, None), + ), + ]); + + let mut selected_mcp_tools = mcp_tools + .iter() + .filter(|(_, tool)| tool.server_name != CODEX_APPS_MCP_SERVER_NAME) + .map(|(name, tool)| (name.clone(), tool.clone())) + .collect::>(); + + let connectors = connectors::accessible_connectors_from_mcp_tools(&mcp_tools); + let explicitly_enabled_connectors = HashSet::new(); + let connectors = filter_connectors_for_input( + &connectors, + &[user_message("run echo")], + &explicitly_enabled_connectors, + &HashMap::new(), + ); + let apps_mcp_tools = filter_codex_apps_mcp_tools_only(&mcp_tools, &connectors); + selected_mcp_tools.extend(apps_mcp_tools); + + let mut tool_names: Vec = selected_mcp_tools.into_keys().collect(); + tool_names.sort(); + assert_eq!(tool_names, vec!["mcp__rmcp__echo".to_string()]); + } + #[test] fn search_tool_selection_keeps_codex_apps_tools_without_mentions() { let selected_tool_names = vec![ @@ -6065,6 +6187,106 @@ mod tests { ); } + #[test] + fn extract_mcp_tool_selection_from_rollout_reads_search_tool_output() { + let rollout_items = vec![ + function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), + function_call_output_rollout_item( + "search-1", + &json!({ + "active_selected_tools": [ + "mcp__codex_apps__calendar_create_event", + "mcp__codex_apps__calendar_list_events", + ], + }) + .to_string(), + ), + ]; + + let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); + assert_eq!( + selected, + Some(vec![ + "mcp__codex_apps__calendar_create_event".to_string(), + "mcp__codex_apps__calendar_list_events".to_string(), + ]) + ); + } + + #[test] + fn extract_mcp_tool_selection_from_rollout_latest_valid_payload_wins() { + let rollout_items = vec![ + function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), + function_call_output_rollout_item( + "search-1", + &json!({ + "active_selected_tools": ["mcp__codex_apps__calendar_create_event"], + }) + .to_string(), + ), + function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-2"), + function_call_output_rollout_item( + "search-2", + &json!({ + "active_selected_tools": ["mcp__codex_apps__calendar_delete_event"], + }) + .to_string(), + ), + ]; + + let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); + assert_eq!( + selected, + Some(vec!["mcp__codex_apps__calendar_delete_event".to_string(),]) + ); + } + + #[test] + fn extract_mcp_tool_selection_from_rollout_ignores_non_search_and_malformed_payloads() { + let rollout_items = vec![ + function_call_rollout_item("shell", "shell-1"), + function_call_output_rollout_item( + "shell-1", + &json!({ + "active_selected_tools": ["mcp__codex_apps__should_be_ignored"], + }) + .to_string(), + ), + function_call_rollout_item(SEARCH_TOOL_BM25_TOOL_NAME, "search-1"), + function_call_output_rollout_item("search-1", "{not-json"), + function_call_output_rollout_item( + "unknown-search-call", + &json!({ + "active_selected_tools": ["mcp__codex_apps__also_ignored"], + }) + .to_string(), + ), + function_call_output_rollout_item( + "search-1", + &json!({ + "active_selected_tools": ["mcp__codex_apps__calendar_list_events"], + }) + .to_string(), + ), + ]; + + let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); + assert_eq!( + selected, + Some(vec!["mcp__codex_apps__calendar_list_events".to_string(),]) + ); + } + + #[test] + fn extract_mcp_tool_selection_from_rollout_returns_none_without_valid_search_output() { + let rollout_items = vec![function_call_rollout_item( + SEARCH_TOOL_BM25_TOOL_NAME, + "search-1", + )]; + let selected = Session::extract_mcp_tool_selection_from_rollout(&rollout_items); + assert_eq!(selected, None); + } + #[tokio::test] async fn reconstruct_history_matches_live_compactions() { let (session, turn_context) = make_session_and_context().await; diff --git a/codex-rs/core/src/mcp_connection_manager.rs b/codex-rs/core/src/mcp_connection_manager.rs index 36f0cbb6c..5a3c4d6e5 100644 --- a/codex-rs/core/src/mcp_connection_manager.rs +++ b/codex-rs/core/src/mcp_connection_manager.rs @@ -908,6 +908,16 @@ pub(crate) fn filter_codex_apps_mcp_tools_only( .collect() } +pub(crate) fn filter_non_codex_apps_mcp_tools_only( + mcp_tools: &HashMap, +) -> HashMap { + mcp_tools + .iter() + .filter(|(_, tool)| tool.server_name != CODEX_APPS_MCP_SERVER_NAME) + .map(|(name, tool)| (name.clone(), tool.clone())) + .collect() +} + pub(crate) fn filter_mcp_tools_by_name( mcp_tools: &HashMap, selected_tools: &[String], diff --git a/codex-rs/core/src/state/session.rs b/codex-rs/core/src/state/session.rs index 70fa78a28..c86f0cdfd 100644 --- a/codex-rs/core/src/state/session.rs +++ b/codex-rs/core/src/state/session.rs @@ -170,6 +170,27 @@ impl SessionState { merged } + pub(crate) fn set_mcp_tool_selection(&mut self, tool_names: Vec) { + if tool_names.is_empty() { + self.active_mcp_tool_selection = None; + return; + } + + let mut selected = Vec::new(); + let mut seen = HashSet::new(); + for tool_name in tool_names { + if seen.insert(tool_name.clone()) { + selected.push(tool_name); + } + } + + self.active_mcp_tool_selection = if selected.is_empty() { + None + } else { + Some(selected) + }; + } + pub(crate) fn get_mcp_tool_selection(&self) -> Option> { self.active_mcp_tool_selection.clone() } @@ -293,6 +314,40 @@ mod tests { assert_eq!(state.get_mcp_tool_selection(), None); } + #[tokio::test] + async fn set_mcp_tool_selection_deduplicates_and_preserves_order() { + let session_configuration = make_session_configuration_for_tests().await; + let mut state = SessionState::new(session_configuration); + state.merge_mcp_tool_selection(vec!["mcp__rmcp__old".to_string()]); + + state.set_mcp_tool_selection(vec![ + "mcp__rmcp__echo".to_string(), + "mcp__rmcp__image".to_string(), + "mcp__rmcp__echo".to_string(), + "mcp__rmcp__search".to_string(), + ]); + + assert_eq!( + state.get_mcp_tool_selection(), + Some(vec![ + "mcp__rmcp__echo".to_string(), + "mcp__rmcp__image".to_string(), + "mcp__rmcp__search".to_string(), + ]) + ); + } + + #[tokio::test] + async fn set_mcp_tool_selection_empty_input_clears_selection() { + let session_configuration = make_session_configuration_for_tests().await; + let mut state = SessionState::new(session_configuration); + state.merge_mcp_tool_selection(vec!["mcp__rmcp__echo".to_string()]); + + state.set_mcp_tool_selection(Vec::new()); + + assert_eq!(state.get_mcp_tool_selection(), None); + } + #[tokio::test] // Verifies connector merging deduplicates repeated IDs. async fn merge_connector_selection_deduplicates_entries() { diff --git a/codex-rs/core/src/tasks/mod.rs b/codex-rs/core/src/tasks/mod.rs index e42315719..eb04619ee 100644 --- a/codex-rs/core/src/tasks/mod.rs +++ b/codex-rs/core/src/tasks/mod.rs @@ -120,7 +120,6 @@ impl Session { task: T, ) { self.abort_all_tasks(TurnAbortReason::Replaced).await; - self.clear_mcp_tool_selection().await; self.clear_connector_selection().await; self.seed_initial_context_if_needed(turn_context.as_ref()) .await; diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index d07d2e68b..9f4a1a16c 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -33,6 +33,7 @@ pub use read_file::ReadFileHandler; pub use request_user_input::RequestUserInputHandler; pub(crate) use request_user_input::request_user_input_tool_description; pub(crate) use search_tool_bm25::DEFAULT_LIMIT as SEARCH_TOOL_BM25_DEFAULT_LIMIT; +pub(crate) use search_tool_bm25::SEARCH_TOOL_BM25_TOOL_NAME; pub use search_tool_bm25::SearchToolBm25Handler; pub use shell::ShellCommandHandler; pub use shell::ShellHandler; diff --git a/codex-rs/core/src/tools/handlers/search_tool_bm25.rs b/codex-rs/core/src/tools/handlers/search_tool_bm25.rs index 37fb34267..e535abac9 100644 --- a/codex-rs/core/src/tools/handlers/search_tool_bm25.rs +++ b/codex-rs/core/src/tools/handlers/search_tool_bm25.rs @@ -10,7 +10,6 @@ use std::collections::HashMap; use std::collections::HashSet; use crate::connectors; -use crate::features::Feature; use crate::function_tool::FunctionCallError; use crate::mcp::CODEX_APPS_MCP_SERVER_NAME; use crate::mcp_connection_manager::ToolInfo; @@ -23,6 +22,7 @@ use crate::tools::registry::ToolKind; pub struct SearchToolBm25Handler; +pub(crate) const SEARCH_TOOL_BM25_TOOL_NAME: &str = "search_tool_bm25"; pub(crate) const DEFAULT_LIMIT: usize = 8; fn default_limit() -> usize { @@ -42,7 +42,6 @@ struct ToolEntry { server_name: String, title: Option, description: Option, - connector_id: Option, connector_name: Option, input_keys: Vec, search_text: String, @@ -66,7 +65,6 @@ impl ToolEntry { .tool .description .map(|description| description.to_string()), - connector_id: info.connector_id, connector_name: info.connector_name, input_keys, search_text, @@ -91,9 +89,9 @@ impl ToolHandler for SearchToolBm25Handler { let arguments = match payload { ToolPayload::Function { arguments } => arguments, _ => { - return Err(FunctionCallError::Fatal( - "search_tool_bm25 handler received unsupported payload".to_string(), - )); + return Err(FunctionCallError::Fatal(format!( + "{SEARCH_TOOL_BM25_TOOL_NAME} handler received unsupported payload" + ))); } }; @@ -120,15 +118,12 @@ impl ToolHandler for SearchToolBm25Handler { .await .list_all_tools() .await; - let mcp_tools = if turn.config.features.enabled(Feature::Apps) { - let connectors = connectors::with_app_enabled_state( - connectors::accessible_connectors_from_mcp_tools(&mcp_tools), - &turn.config, - ); - filter_codex_apps_mcp_tools(mcp_tools, &connectors) - } else { - mcp_tools - }; + + let connectors = connectors::with_app_enabled_state( + connectors::accessible_connectors_from_mcp_tools(&mcp_tools), + &turn.config, + ); + let mcp_tools = filter_codex_apps_mcp_tools(mcp_tools, &connectors); let mut entries: Vec = mcp_tools .into_iter() @@ -172,7 +167,6 @@ impl ToolHandler for SearchToolBm25Handler { "server": entry.server_name.clone(), "title": entry.title.clone(), "description": entry.description.clone(), - "connector_id": entry.connector_id.clone(), "connector_name": entry.connector_name.clone(), "input_keys": entry.input_keys.clone(), "score": result.score, @@ -243,12 +237,6 @@ fn build_search_text(name: &str, info: &ToolInfo, input_keys: &[String]) -> Stri parts.push(connector_name.to_string()); } - if let Some(connector_id) = info.connector_id.as_deref() - && !connector_id.trim().is_empty() - { - parts.push(connector_id.to_string()); - } - if !input_keys.is_empty() { parts.extend(input_keys.iter().cloned()); } diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 06181ae0f..20b27ccce 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -8,6 +8,7 @@ use crate::features::Features; use crate::mcp_connection_manager::ToolInfo; use crate::tools::handlers::PLAN_TOOL; use crate::tools::handlers::SEARCH_TOOL_BM25_DEFAULT_LIMIT; +use crate::tools::handlers::SEARCH_TOOL_BM25_TOOL_NAME; use crate::tools::handlers::apply_patch::create_apply_patch_freeform_tool; use crate::tools::handlers::apply_patch::create_apply_patch_json_tool; use crate::tools::handlers::collab::DEFAULT_WAIT_TIMEOUT_MS; @@ -913,7 +914,7 @@ fn create_search_tool_bm25_tool(app_tools: &HashMap) -> ToolSp SEARCH_TOOL_BM25_DESCRIPTION_TEMPLATE.replace("{{app_names}}", app_names.as_str()); ToolSpec::Function(ResponsesApiTool { - name: "search_tool_bm25".to_string(), + name: SEARCH_TOOL_BM25_TOOL_NAME.to_string(), description, strict: false, parameters: JsonSchema::Object { @@ -1507,7 +1508,7 @@ pub(crate) fn build_specs( && let Some(app_tools) = app_tools { builder.push_spec_with_parallel_support(create_search_tool_bm25_tool(&app_tools), true); - builder.register_handler("search_tool_bm25", search_tool_handler); + builder.register_handler(SEARCH_TOOL_BM25_TOOL_NAME, search_tool_handler); } if let Some(apply_patch_tool_type) = &config.apply_patch_tool_type { @@ -2579,7 +2580,7 @@ mod tests { ) .build(); - let search_tool = find_tool(&tools, "search_tool_bm25"); + let search_tool = find_tool(&tools, SEARCH_TOOL_BM25_TOOL_NAME); let ToolSpec::Function(ResponsesApiTool { description, .. }) = &search_tool.spec else { panic!("expected function tool"); }; diff --git a/codex-rs/core/templates/search_tool/tool_description.md b/codex-rs/core/templates/search_tool/tool_description.md index ce4268177..0c84a6f52 100644 --- a/codex-rs/core/templates/search_tool/tool_description.md +++ b/codex-rs/core/templates/search_tool/tool_description.md @@ -2,7 +2,7 @@ Searches over apps tool metadata with BM25 and exposes matching tools for the next model call. -When `search_tool_bm25` is available, apps tools (`mcp__codex_apps__...`) are hidden until you search for them. +MCP tools of the apps ({{app_names}}) are hidden until you search for them with this tool (`search_tool_bm25`). Follow this workflow: @@ -10,8 +10,8 @@ Follow this workflow: - `query` (required): focused terms that describe the capability you need. - `limit` (optional): maximum number of tools to return (default `8`). 2. Use the returned `tools` list to decide which Apps tools are relevant. -3. Matching tools are added to available `tools` and available for the remainder of the current turn. -4. Repeated searches in the same turn are additive: new matches are unioned into `tools`. +3. Matching tools are added to available `tools` and available for the remainder of the current session/thread. +4. Repeated searches in the same session/thread are additive: new matches are unioned into `tools`. Notes: - Core tools remain available without searching. @@ -23,8 +23,6 @@ Notes: - `title` - `description` - `connector_name` - - `connector_id` - input schema property keys (`input_keys`) -- Use `search_tool_bm25` when the user asks to work with one of apps ({{app_names}}) and the exact tool name is not already known. - If the needed app is already explicit in the prompt (for example an `apps://...` mention) or already present in the current `tools` list, you can call that tool directly. -- Do not use `search_tool_bm25` for non-apps/local tasks (filesystem, repo search, or shell-only workflows). +- Do not use `search_tool_bm25` for non-apps/local tasks (filesystem, repo search, or shell-only workflows) or anything not related to {{app_names}}. diff --git a/codex-rs/core/tests/common/apps_test_server.rs b/codex-rs/core/tests/common/apps_test_server.rs new file mode 100644 index 000000000..c562e390e --- /dev/null +++ b/codex-rs/core/tests/common/apps_test_server.rs @@ -0,0 +1,158 @@ +use anyhow::Result; +use serde_json::Value; +use serde_json::json; +use wiremock::Mock; +use wiremock::MockServer; +use wiremock::Request; +use wiremock::Respond; +use wiremock::ResponseTemplate; +use wiremock::matchers::method; +use wiremock::matchers::path; +use wiremock::matchers::path_regex; + +const CONNECTOR_ID: &str = "calendar"; +const CONNECTOR_NAME: &str = "Calendar"; +const PROTOCOL_VERSION: &str = "2025-11-25"; +const SERVER_NAME: &str = "codex-apps-test"; +const SERVER_VERSION: &str = "1.0.0"; + +#[derive(Clone)] +pub struct AppsTestServer { + pub chatgpt_base_url: String, +} + +impl AppsTestServer { + pub async fn mount(server: &MockServer) -> Result { + mount_oauth_metadata(server).await; + mount_streamable_http_json_rpc(server).await; + Ok(Self { + chatgpt_base_url: server.uri(), + }) + } +} + +async fn mount_oauth_metadata(server: &MockServer) { + Mock::given(method("GET")) + .and(path("/.well-known/oauth-authorization-server/mcp")) + .respond_with(ResponseTemplate::new(200).set_body_json(json!({ + "authorization_endpoint": format!("{}/oauth/authorize", server.uri()), + "token_endpoint": format!("{}/oauth/token", server.uri()), + "scopes_supported": [""], + }))) + .mount(server) + .await; +} + +async fn mount_streamable_http_json_rpc(server: &MockServer) { + Mock::given(method("POST")) + .and(path_regex("^/api/codex/apps/?$")) + .respond_with(CodexAppsJsonRpcResponder) + .mount(server) + .await; +} + +struct CodexAppsJsonRpcResponder; + +impl Respond for CodexAppsJsonRpcResponder { + fn respond(&self, request: &Request) -> ResponseTemplate { + let body: Value = match serde_json::from_slice(&request.body) { + Ok(body) => body, + Err(error) => { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": format!("invalid JSON-RPC body: {error}"), + })); + } + }; + + let Some(method) = body.get("method").and_then(Value::as_str) else { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": "missing method in JSON-RPC request", + })); + }; + + match method { + "initialize" => { + let id = body.get("id").cloned().unwrap_or(Value::Null); + let protocol_version = body + .pointer("/params/protocolVersion") + .and_then(Value::as_str) + .unwrap_or(PROTOCOL_VERSION); + ResponseTemplate::new(200).set_body_json(json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "protocolVersion": protocol_version, + "capabilities": { + "tools": { + "listChanged": true + } + }, + "serverInfo": { + "name": SERVER_NAME, + "version": SERVER_VERSION + } + } + })) + } + "notifications/initialized" => ResponseTemplate::new(202), + "tools/list" => { + let id = body.get("id").cloned().unwrap_or(Value::Null); + ResponseTemplate::new(200).set_body_json(json!({ + "jsonrpc": "2.0", + "id": id, + "result": { + "tools": [ + { + "name": "calendar_create_event", + "description": "Create a calendar event.", + "inputSchema": { + "type": "object", + "properties": { + "title": { "type": "string" }, + "starts_at": { "type": "string" }, + "timezone": { "type": "string" } + }, + "required": ["title", "starts_at"], + "additionalProperties": false + }, + "_meta": { + "connector_id": CONNECTOR_ID, + "connector_name": CONNECTOR_NAME + } + }, + { + "name": "calendar_list_events", + "description": "List calendar events.", + "inputSchema": { + "type": "object", + "properties": { + "query": { "type": "string" }, + "limit": { "type": "integer" } + }, + "additionalProperties": false + }, + "_meta": { + "connector_id": CONNECTOR_ID, + "connector_name": CONNECTOR_NAME + } + } + ], + "nextCursor": null + } + })) + } + method if method.starts_with("notifications/") => ResponseTemplate::new(202), + _ => { + let id = body.get("id").cloned().unwrap_or(Value::Null); + ResponseTemplate::new(200).set_body_json(json!({ + "jsonrpc": "2.0", + "id": id, + "error": { + "code": -32601, + "message": format!("method not found: {method}") + } + })) + } + } + } +} diff --git a/codex-rs/core/tests/common/lib.rs b/codex-rs/core/tests/common/lib.rs index 79bf9369e..d2f9c9ac5 100644 --- a/codex-rs/core/tests/common/lib.rs +++ b/codex-rs/core/tests/common/lib.rs @@ -12,6 +12,7 @@ use codex_utils_absolute_path::AbsolutePathBuf; use regex_lite::Regex; use std::path::PathBuf; +pub mod apps_test_server; pub mod context_snapshot; pub mod process; pub mod responses; diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 7c6799df9..bbdbc7323 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -34,6 +34,7 @@ use codex_protocol::models::ReasoningItemReasoningSummary; use codex_protocol::models::WebSearchAction; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::user_input::UserInput; +use core_test_support::apps_test_server::AppsTestServer; use core_test_support::load_default_config_for_test; use core_test_support::responses::ev_completed; use core_test_support::responses::ev_completed_with_tokens; @@ -671,6 +672,10 @@ async fn includes_user_instructions_message_in_request() { async fn includes_apps_guidance_as_developer_message_when_enabled() { skip_if_no_network!(); let server = MockServer::start().await; + let apps_server = AppsTestServer::mount(&server) + .await + .expect("mount apps MCP mock"); + let apps_base_url = apps_server.chatgpt_base_url.clone(); let resp_mock = mount_sse_once( &server, @@ -680,8 +685,10 @@ async fn includes_apps_guidance_as_developer_message_when_enabled() { let mut builder = test_codex() .with_auth(CodexAuth::from_api_key("Test API Key")) - .with_config(|config| { + .with_config(move |config| { config.features.enable(Feature::Apps); + config.features.disable(Feature::AppsMcpGateway); + config.chatgpt_base_url = apps_base_url; }); let codex = builder .build(&server) diff --git a/codex-rs/core/tests/suite/search_tool.rs b/codex-rs/core/tests/suite/search_tool.rs index d40c12a42..7d2de4eb9 100644 --- a/codex-rs/core/tests/suite/search_tool.rs +++ b/codex-rs/core/tests/suite/search_tool.rs @@ -1,14 +1,22 @@ #![cfg(not(target_os = "windows"))] #![allow(clippy::unwrap_used, clippy::expect_used)] +use std::sync::Arc; use std::time::Duration; use anyhow::Result; +use codex_core::CodexThread; +use codex_core::NewThread; +use codex_core::config::Config; use codex_core::config::types::McpServerConfig; use codex_core::config::types::McpServerTransportConfig; use codex_core::features::Feature; use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::Op; use codex_core::protocol::SandboxPolicy; +use codex_protocol::user_input::UserInput; +use core_test_support::apps_test_server::AppsTestServer; use core_test_support::responses::ResponsesRequest; use core_test_support::responses::ev_assistant_message; use core_test_support::responses::ev_completed; @@ -19,15 +27,24 @@ use core_test_support::responses::sse; use core_test_support::responses::start_mock_server; use core_test_support::skip_if_no_network; use core_test_support::stdio_server_bin; +use core_test_support::test_codex::TestCodexBuilder; use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; use pretty_assertions::assert_eq; use serde_json::Value; use serde_json::json; const SEARCH_TOOL_INSTRUCTION_SNIPPETS: [&str; 2] = [ - "apps tools (`mcp__codex_apps__...`) are hidden until you search for them.", - "Matching tools are added to available `tools` and available for the remainder of the current turn.", + "MCP tools of the apps (Calendar) are hidden until you search for them with this tool", + "Matching tools are added to available `tools` and available for the remainder of the current session/thread.", ]; +const SEARCH_TOOL_BM25_TOOL_NAME: &str = "search_tool_bm25"; +const CALENDAR_CREATE_TOOL: &str = "mcp__codex_apps__calendar_create_event"; +const CALENDAR_LIST_TOOL: &str = "mcp__codex_apps__calendar_list_events"; +const RMCP_ECHO_TOOL: &str = "mcp__rmcp__echo"; +const RMCP_IMAGE_TOOL: &str = "mcp__rmcp__image"; +const CALENDAR_CREATE_QUERY: &str = "create calendar event"; +const CALENDAR_LIST_QUERY: &str = "list calendar events"; fn tool_names(body: &Value) -> Vec { body.get("tools") @@ -51,7 +68,7 @@ fn search_tool_description(body: &Value) -> Option { .and_then(Value::as_array) .and_then(|tools| { tools.iter().find_map(|tool| { - if tool.get("name").and_then(Value::as_str) == Some("search_tool_bm25") { + if tool.get("name").and_then(Value::as_str) == Some(SEARCH_TOOL_BM25_TOOL_NAME) { tool.get("description") .and_then(Value::as_str) .map(str::to_string) @@ -65,9 +82,13 @@ fn search_tool_description(body: &Value) -> Option { fn search_tool_output_payload(request: &ResponsesRequest, call_id: &str) -> Value { let (content, _success) = request .function_call_output_content_and_success(call_id) - .expect("search_tool_bm25 function_call_output should be present"); - let content = content.expect("search_tool_bm25 output should include content"); - serde_json::from_str(&content).expect("search_tool_bm25 content should be valid JSON") + .unwrap_or_else(|| { + panic!("{SEARCH_TOOL_BM25_TOOL_NAME} function_call_output should be present") + }); + let content = content + .unwrap_or_else(|| panic!("{SEARCH_TOOL_BM25_TOOL_NAME} output should include content")); + serde_json::from_str(&content) + .unwrap_or_else(|_| panic!("{SEARCH_TOOL_BM25_TOOL_NAME} content should be valid JSON")) } fn active_selected_tools(payload: &Value) -> Vec { @@ -95,11 +116,70 @@ fn search_result_tools(payload: &Value) -> Vec<&Value> { .collect() } +fn rmcp_server_config(command: String) -> McpServerConfig { + McpServerConfig { + transport: McpServerTransportConfig::Stdio { + command, + args: Vec::new(), + env: None, + env_vars: Vec::new(), + cwd: None, + }, + enabled: true, + required: false, + disabled_reason: None, + startup_timeout_sec: Some(Duration::from_secs(10)), + tool_timeout_sec: None, + enabled_tools: None, + disabled_tools: None, + scopes: None, + } +} + +fn configure_apps_with_optional_rmcp( + config: &mut Config, + apps_base_url: &str, + rmcp_server_bin: Option, +) { + config.features.enable(Feature::Apps); + config.features.disable(Feature::AppsMcpGateway); + config.chatgpt_base_url = apps_base_url.to_string(); + if let Some(command) = rmcp_server_bin { + let mut servers = config.mcp_servers.get().clone(); + servers.insert("rmcp".to_string(), rmcp_server_config(command)); + config + .mcp_servers + .set(servers) + .expect("test mcp servers should accept any configuration"); + } +} + +fn configured_builder(apps_base_url: String, rmcp_server_bin: Option) -> TestCodexBuilder { + test_codex().with_config(move |config| { + configure_apps_with_optional_rmcp(config, apps_base_url.as_str(), rmcp_server_bin); + }) +} + +async fn submit_user_input(thread: &Arc, text: &str) -> Result<()> { + thread + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: text.to_string(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(thread, |event| matches!(event, EventMsg::TurnComplete(_))).await; + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn search_tool_flag_adds_tool() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; let mock = mount_sse_sequence( &server, vec![sse(vec![ @@ -110,9 +190,7 @@ async fn search_tool_flag_adds_tool() -> Result<()> { ) .await; - let mut builder = test_codex().with_config(|config| { - config.features.enable(Feature::Apps); - }); + let mut builder = configured_builder(apps_server.chatgpt_base_url.clone(), None); let test = builder.build(&server).await?; test.submit_turn_with_policies( @@ -125,8 +203,8 @@ async fn search_tool_flag_adds_tool() -> Result<()> { let body = mock.single_request().body_json(); let tools = tool_names(&body); assert!( - tools.iter().any(|name| name == "search_tool_bm25"), - "tools list should include search_tool_bm25 when enabled: {tools:?}" + tools.iter().any(|name| name == SEARCH_TOOL_BM25_TOOL_NAME), + "tools list should include {SEARCH_TOOL_BM25_TOOL_NAME} when enabled: {tools:?}" ); Ok(()) @@ -137,6 +215,7 @@ async fn search_tool_adds_discovery_instructions_to_tool_description() -> Result skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; let mock = mount_sse_sequence( &server, vec![sse(vec![ @@ -147,9 +226,7 @@ async fn search_tool_adds_discovery_instructions_to_tool_description() -> Result ) .await; - let mut builder = test_codex().with_config(|config| { - config.features.enable(Feature::Apps); - }); + let mut builder = configured_builder(apps_server.chatgpt_base_url.clone(), None); let test = builder.build(&server).await?; test.submit_turn_with_policies( @@ -160,23 +237,24 @@ async fn search_tool_adds_discovery_instructions_to_tool_description() -> Result .await?; let body = mock.single_request().body_json(); - let search_tool_description = - search_tool_description(&body).expect("search tool description should be present"); + let description = search_tool_description(&body).expect("search tool description should exist"); assert!( SEARCH_TOOL_INSTRUCTION_SNIPPETS .iter() - .all(|snippet| search_tool_description.contains(snippet)), - "search tool description should include search tool workflow: {search_tool_description:?}" + .all(|snippet| description.contains(snippet)), + "search tool description should include search tool workflow: {description:?}" ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn search_tool_hides_mcp_tools_without_search() -> Result<()> { +async fn search_tool_hides_apps_tools_without_search_but_keeps_non_app_tools_visible() -> Result<()> +{ skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; let mock = mount_sse_sequence( &server, vec![sse(vec![ @@ -188,34 +266,10 @@ async fn search_tool_hides_mcp_tools_without_search() -> Result<()> { .await; let rmcp_test_server_bin = stdio_server_bin()?; - let mut builder = test_codex().with_config(move |config| { - config.features.enable(Feature::Apps); - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - "rmcp".to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - }, - ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); - }); + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); let test = builder.build(&server).await?; test.submit_turn_with_policies( @@ -228,35 +282,96 @@ async fn search_tool_hides_mcp_tools_without_search() -> Result<()> { let body = mock.single_request().body_json(); let tools = tool_names(&body); assert!( - tools.iter().any(|name| name == "search_tool_bm25"), - "tools list should include search_tool_bm25 when enabled: {tools:?}" + tools.iter().any(|name| name == SEARCH_TOOL_BM25_TOOL_NAME), + "tools list should include {SEARCH_TOOL_BM25_TOOL_NAME}: {tools:?}" ); assert!( - !tools.iter().any(|name| name == "mcp__rmcp__echo"), - "tools list should not include MCP tools before search: {tools:?}" + tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should remain visible in Apps mode: {tools:?}" ); assert!( - !tools.iter().any(|name| name == "mcp__rmcp__image"), - "tools list should not include MCP tools before search: {tools:?}" + tools.iter().any(|name| name == RMCP_IMAGE_TOOL), + "non-app MCP tools should remain visible in Apps mode: {tools:?}" + ); + assert!( + !tools.iter().any(|name| name == CALENDAR_CREATE_TOOL), + "apps tools should stay hidden before search/mention: {tools:?}" + ); + assert!( + !tools.iter().any(|name| name == CALENDAR_LIST_TOOL), + "apps tools should stay hidden before search/mention: {tools:?}" ); Ok(()) } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn search_tool_selection_persists_within_turn_and_resets_next_turn() -> Result<()> { +async fn explicit_app_mentions_expose_apps_tools_without_search() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let mock = mount_sse_sequence( + &server, + vec![sse(vec![ + ev_response_created("resp-1"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-1"), + ])], + ) + .await; + + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); + let test = builder.build(&server).await?; + + test.submit_turn_with_policies( + "Use [$calendar](app://calendar) and then call tools.", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let body = mock.single_request().body_json(); + let tools = tool_names(&body); + assert!( + tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app tools should remain visible: {tools:?}" + ); + assert!( + tools.iter().any(|name| name == CALENDAR_CREATE_TOOL), + "apps tools should be available after explicit app mention: {tools:?}" + ); + assert!( + tools.iter().any(|name| name == CALENDAR_LIST_TOOL), + "apps tools should be available after explicit app mention: {tools:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn search_tool_selection_persists_across_turns() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; let call_id = "tool-search"; let args = json!({ - "query": "echo", + "query": CALENDAR_CREATE_QUERY, "limit": 1, }); let responses = vec![ sse(vec![ ev_response_created("resp-1"), - ev_function_call(call_id, "search_tool_bm25", &serde_json::to_string(&args)?), + ev_function_call( + call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&args)?, + ), ev_completed("resp-1"), ]), sse(vec![ @@ -271,38 +386,14 @@ async fn search_tool_selection_persists_within_turn_and_resets_next_turn() -> Re let mock = mount_sse_sequence(&server, responses).await; let rmcp_test_server_bin = stdio_server_bin()?; - let mut builder = test_codex().with_config(move |config| { - config.features.enable(Feature::Apps); - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - "rmcp".to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - }, - ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); - }); + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); let test = builder.build(&server).await?; test.submit_turn_with_policies( - "find the echo tool", + "find calendar create tool", AskForApproval::Never, SandboxPolicy::DangerFullAccess, ) @@ -324,18 +415,12 @@ async fn search_tool_selection_persists_within_turn_and_resets_next_turn() -> Re let first_tools = tool_names(&requests[0].body_json()); assert!( - !first_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "first request should not include MCP tools before search: {first_tools:?}" - ); - - let second_tools = tool_names(&requests[1].body_json()); - assert!( - !second_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "second request should not include rmcp MCP tools after search: {second_tools:?}" + first_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should be available before search: {first_tools:?}" ); assert!( - !second_tools.iter().any(|name| name == "mcp__rmcp__image"), - "second request should not include rmcp MCP tools after search: {second_tools:?}" + !first_tools.iter().any(|name| name == CALENDAR_CREATE_TOOL), + "apps tools should not be visible before search: {first_tools:?}" ); let search_output_payload = search_tool_output_payload(&requests[1], call_id); @@ -350,18 +435,44 @@ async fn search_tool_selection_persists_within_turn_and_resets_next_turn() -> Re "search results should only include codex_apps tools: {search_output_payload:?}" ); } + + let selected_tools = active_selected_tools(&search_output_payload); assert!( - !active_selected_tools(&search_output_payload) + selected_tools + .iter() + .any(|tool| tool == CALENDAR_CREATE_TOOL), + "calendar create tool should be selected: {search_output_payload:?}" + ); + assert!( + !selected_tools .iter() .any(|tool_name| tool_name.starts_with("mcp__rmcp__")), "search should not add rmcp tools to active selection: {search_output_payload:?}" ); + let second_tools = tool_names(&requests[1].body_json()); + assert!( + second_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should remain visible after search: {second_tools:?}" + ); + for selected_tool in &selected_tools { + assert!( + second_tools.iter().any(|name| name == selected_tool), + "follow-up request should include selected tool {selected_tool:?}: {second_tools:?}" + ); + } + let third_tools = tool_names(&requests[2].body_json()); assert!( - !third_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "third request should not include MCP tools after turn reset: {third_tools:?}" + third_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should remain visible on later turns: {third_tools:?}" ); + for selected_tool in &selected_tools { + assert!( + third_tools.iter().any(|name| name == selected_tool), + "subsequent turn should include selected tool {selected_tool:?}: {third_tools:?}" + ); + } Ok(()) } @@ -371,14 +482,15 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { skip_if_no_network!(Ok(())); let server = start_mock_server().await; - let first_call_id = "tool-search-echo"; - let second_call_id = "tool-search-image"; + let apps_server = AppsTestServer::mount(&server).await?; + let first_call_id = "tool-search-create"; + let second_call_id = "tool-search-list"; let first_args = json!({ - "query": "echo", + "query": CALENDAR_CREATE_QUERY, "limit": 1, }); let second_args = json!({ - "query": "image", + "query": CALENDAR_LIST_QUERY, "limit": 1, }); let responses = vec![ @@ -386,7 +498,7 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { ev_response_created("resp-1"), ev_function_call( first_call_id, - "search_tool_bm25", + SEARCH_TOOL_BM25_TOOL_NAME, &serde_json::to_string(&first_args)?, ), ev_completed("resp-1"), @@ -395,7 +507,7 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { ev_response_created("resp-2"), ev_function_call( second_call_id, - "search_tool_bm25", + SEARCH_TOOL_BM25_TOOL_NAME, &serde_json::to_string(&second_args)?, ), ev_completed("resp-2"), @@ -408,38 +520,14 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { let mock = mount_sse_sequence(&server, responses).await; let rmcp_test_server_bin = stdio_server_bin()?; - let mut builder = test_codex().with_config(move |config| { - config.features.enable(Feature::Apps); - let mut servers = config.mcp_servers.get().clone(); - servers.insert( - "rmcp".to_string(), - McpServerConfig { - transport: McpServerTransportConfig::Stdio { - command: rmcp_test_server_bin, - args: Vec::new(), - env: None, - env_vars: Vec::new(), - cwd: None, - }, - enabled: true, - required: false, - disabled_reason: None, - startup_timeout_sec: Some(Duration::from_secs(10)), - tool_timeout_sec: None, - enabled_tools: None, - disabled_tools: None, - scopes: None, - }, - ); - config - .mcp_servers - .set(servers) - .expect("test mcp servers should accept any configuration"); - }); + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); let test = builder.build(&server).await?; test.submit_turn_with_policies( - "find echo and image tools", + "find create and list calendar tools", AskForApproval::Never, SandboxPolicy::DangerFullAccess, ) @@ -455,28 +543,16 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { let first_tools = tool_names(&requests[0].body_json()); assert!( - !first_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "first request should not include MCP tools before search: {first_tools:?}" - ); - - let second_tools = tool_names(&requests[1].body_json()); - assert!( - !second_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "second request should not include rmcp tools after first search: {second_tools:?}" + first_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should be visible before search: {first_tools:?}" ); assert!( - !second_tools.iter().any(|name| name == "mcp__rmcp__image"), - "second request should not include rmcp tools after first search: {second_tools:?}" - ); - - let third_tools = tool_names(&requests[2].body_json()); - assert!( - !third_tools.iter().any(|name| name == "mcp__rmcp__echo"), - "third request should not include rmcp tools: {third_tools:?}" + !first_tools.iter().any(|name| name == CALENDAR_CREATE_TOOL), + "apps tools should be hidden before search: {first_tools:?}" ); assert!( - !third_tools.iter().any(|name| name == "mcp__rmcp__image"), - "third request should not include rmcp tools: {third_tools:?}" + !first_tools.iter().any(|name| name == CALENDAR_LIST_TOOL), + "apps tools should be hidden before search: {first_tools:?}" ); let second_search_payload = search_tool_output_payload(&requests[2], second_call_id); @@ -491,11 +567,495 @@ async fn search_tool_selection_unions_results_within_turn() -> Result<()> { "search results should only include codex_apps tools: {second_search_payload:?}" ); } + + let selected_tools = active_selected_tools(&second_search_payload); + assert_eq!( + selected_tools, + vec![ + CALENDAR_CREATE_TOOL.to_string(), + CALENDAR_LIST_TOOL.to_string(), + ], + "two searches in one turn should union selected apps tools" + ); + + let third_tools = tool_names(&requests[2].body_json()); assert!( - !active_selected_tools(&second_search_payload) - .iter() - .any(|tool_name| tool_name.starts_with("mcp__rmcp__")), - "search should not add rmcp tools to active selection: {second_search_payload:?}" + third_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app MCP tools should remain visible after repeated search: {third_tools:?}" + ); + assert!( + third_tools.iter().any(|name| name == CALENDAR_CREATE_TOOL), + "calendar create should be available after repeated search: {third_tools:?}" + ); + assert!( + third_tools.iter().any(|name| name == CALENDAR_LIST_TOOL), + "calendar list should be available after repeated search: {third_tools:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn search_tool_selection_restores_when_resumed() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let call_id = "tool-search"; + let args = json!({ + "query": CALENDAR_CREATE_QUERY, + "limit": 1, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&args)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_response_created("resp-3"), + ev_assistant_message("msg-2", "resumed done"), + ev_completed("resp-3"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin.clone()), + ); + let test = builder.build(&server).await?; + + let home = test.home.clone(); + let rollout_path = test + .session_configured + .rollout_path + .clone() + .expect("rollout path should be available for resume"); + + test.submit_turn_with_policies( + "find calendar tools", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 2, + "expected 2 requests after initial turn, got {}", + requests.len() + ); + let search_output_payload = search_tool_output_payload(&requests[1], call_id); + let selected_tools = active_selected_tools(&search_output_payload); + assert!( + selected_tools + .iter() + .any(|name| name == CALENDAR_CREATE_TOOL), + "search should select calendar create before resume: {search_output_payload:?}" + ); + + let mut resume_builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); + let resumed = resume_builder.resume(&server, home, rollout_path).await?; + resumed + .submit_turn_with_policies( + "hello after resume", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 3, + "expected 3 requests after resumed turn, got {}", + requests.len() + ); + let resumed_tools = tool_names(&requests[2].body_json()); + assert!( + resumed_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app tools should remain visible after resume: {resumed_tools:?}" + ); + for selected_tool in &selected_tools { + assert!( + resumed_tools.iter().any(|name| name == selected_tool), + "resumed request should include restored selected tool {selected_tool:?}: {resumed_tools:?}" + ); + } + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn search_tool_selection_union_restores_when_resumed_after_multiple_search_calls() +-> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let first_call_id = "tool-search-create"; + let second_call_id = "tool-search-list"; + let first_args = json!({ + "query": CALENDAR_CREATE_QUERY, + "limit": 1, + }); + let second_args = json!({ + "query": CALENDAR_LIST_QUERY, + "limit": 1, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + first_call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&first_args)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "first search done"), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_response_created("resp-3"), + ev_function_call( + second_call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&second_args)?, + ), + ev_completed("resp-3"), + ]), + sse(vec![ + ev_response_created("resp-4"), + ev_assistant_message("msg-2", "second search done"), + ev_completed("resp-4"), + ]), + sse(vec![ + ev_response_created("resp-5"), + ev_assistant_message("msg-3", "resumed done"), + ev_completed("resp-5"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin.clone()), + ); + let test = builder.build(&server).await?; + + let home = test.home.clone(); + let rollout_path = test + .session_configured + .rollout_path + .clone() + .expect("rollout path should be available for resume"); + + test.submit_turn_with_policies( + "find create calendar tool", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + test.submit_turn_with_policies( + "find list calendar tool", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 4, + "expected 4 requests before resume, got {}", + requests.len() + ); + + let first_search_payload = search_tool_output_payload(&requests[1], first_call_id); + let first_result_tools = search_result_tools(&first_search_payload); + assert_eq!( + first_result_tools.len(), + 1, + "first search should return exactly one tool: {first_search_payload:?}" + ); + assert_eq!( + first_result_tools[0].get("name").and_then(Value::as_str), + Some(CALENDAR_CREATE_TOOL), + "first search should return calendar create tool: {first_search_payload:?}" + ); + let first_selected_tools = active_selected_tools(&first_search_payload); + assert_eq!( + first_selected_tools, + vec![CALENDAR_CREATE_TOOL.to_string()], + "first search should only select create tool: {first_search_payload:?}" + ); + + let second_search_payload = search_tool_output_payload(&requests[3], second_call_id); + let second_result_tools = search_result_tools(&second_search_payload); + assert_eq!( + second_result_tools.len(), + 1, + "second search should return exactly one tool: {second_search_payload:?}" + ); + assert_eq!( + second_result_tools[0].get("name").and_then(Value::as_str), + Some(CALENDAR_LIST_TOOL), + "second search should return calendar list tool: {second_search_payload:?}" + ); + let second_selected_tools = active_selected_tools(&second_search_payload); + assert_eq!( + second_selected_tools, + vec![ + CALENDAR_CREATE_TOOL.to_string(), + CALENDAR_LIST_TOOL.to_string(), + ], + "multiple searches should persist union before resume: {second_search_payload:?}" + ); + + let mut resume_builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); + let resumed = resume_builder.resume(&server, home, rollout_path).await?; + resumed + .submit_turn_with_policies( + "hello after resume with union", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 5, + "expected 5 requests after resumed turn, got {}", + requests.len() + ); + + let resumed_tools = tool_names(&requests[4].body_json()); + assert!( + resumed_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app tools should remain visible after resume: {resumed_tools:?}" + ); + assert!( + resumed_tools + .iter() + .any(|name| name == CALENDAR_CREATE_TOOL), + "resumed turn should restore calendar create tool: {resumed_tools:?}" + ); + assert!( + resumed_tools.iter().any(|name| name == CALENDAR_LIST_TOOL), + "resumed turn should restore calendar list tool: {resumed_tools:?}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn search_tool_selection_restores_when_forked_with_full_history() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let call_id = "tool-search"; + let args = json!({ + "query": CALENDAR_CREATE_QUERY, + "limit": 1, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&args)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_response_created("resp-3"), + ev_assistant_message("msg-2", "forked done"), + ev_completed("resp-3"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); + let test = builder.build(&server).await?; + + test.submit_turn_with_policies( + "find calendar tools", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 2, + "expected 2 requests after initial turn, got {}", + requests.len() + ); + let search_output_payload = search_tool_output_payload(&requests[1], call_id); + let selected_tools = active_selected_tools(&search_output_payload); + assert!( + selected_tools + .iter() + .any(|name| name == CALENDAR_CREATE_TOOL), + "search should select calendar create: {search_output_payload:?}" + ); + + let rollout_path = test + .codex + .rollout_path() + .expect("rollout path should exist for fork"); + let NewThread { thread: forked, .. } = test + .thread_manager + .fork_thread(usize::MAX, test.config.clone(), rollout_path, false) + .await?; + submit_user_input(&forked, "hello after fork").await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 3, + "expected 3 requests after forked turn, got {}", + requests.len() + ); + let forked_tools = tool_names(&requests[2].body_json()); + assert!( + forked_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app tools should remain visible in forked thread: {forked_tools:?}" + ); + for selected_tool in &selected_tools { + assert!( + forked_tools.iter().any(|name| name == selected_tool), + "forked request should include restored selected tool {selected_tool:?}: {forked_tools:?}" + ); + } + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn search_tool_selection_drops_when_fork_excludes_search_turn() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let apps_server = AppsTestServer::mount(&server).await?; + let call_id = "tool-search"; + let args = json!({ + "query": CALENDAR_CREATE_QUERY, + "limit": 1, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call( + call_id, + SEARCH_TOOL_BM25_TOOL_NAME, + &serde_json::to_string(&args)?, + ), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + sse(vec![ + ev_response_created("resp-3"), + ev_assistant_message("msg-2", "forked done"), + ev_completed("resp-3"), + ]), + ]; + let mock = mount_sse_sequence(&server, responses).await; + + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = configured_builder( + apps_server.chatgpt_base_url.clone(), + Some(rmcp_test_server_bin), + ); + let test = builder.build(&server).await?; + + test.submit_turn_with_policies( + "find calendar tools", + AskForApproval::Never, + SandboxPolicy::DangerFullAccess, + ) + .await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 2, + "expected 2 requests after initial turn, got {}", + requests.len() + ); + let search_output_payload = search_tool_output_payload(&requests[1], call_id); + let selected_tools = active_selected_tools(&search_output_payload); + assert!( + !selected_tools.is_empty(), + "search turn should produce selected tools: {search_output_payload:?}" + ); + + let rollout_path = test + .codex + .rollout_path() + .expect("rollout path should exist for fork"); + let NewThread { thread: forked, .. } = test + .thread_manager + .fork_thread(0, test.config.clone(), rollout_path, false) + .await?; + submit_user_input(&forked, "hello after fork").await?; + + let requests = mock.requests(); + assert_eq!( + requests.len(), + 3, + "expected 3 requests after forked turn, got {}", + requests.len() + ); + let forked_tools = tool_names(&requests[2].body_json()); + assert!( + forked_tools.iter().any(|name| name == RMCP_ECHO_TOOL), + "non-app tools should remain visible in forked thread: {forked_tools:?}" + ); + assert!( + !forked_tools + .iter() + .any(|name| name.starts_with("mcp__codex_apps__")), + "forked history without search turn should not restore apps tools: {forked_tools:?}" ); Ok(())