From 70eddad6b075f26f0f93c66f7ec9a4e49cdadc93 Mon Sep 17 00:00:00 2001 From: Channing Conger Date: Sat, 14 Mar 2026 01:58:43 -0700 Subject: [PATCH] dynamic tool calls: add param `exposeToContext` to optionally hide tool (#14501) This extends dynamic_tool_calls to allow us to hide a tool from the model context but still use it as part of the general tool calling runtime (for ex from js_repl/code_mode) --- .../schema/json/ClientRequest.json | 3 + .../codex_app_server_protocol.schemas.json | 3 + .../codex_app_server_protocol.v2.schemas.json | 3 + .../schema/json/v2/ThreadStartParams.json | 3 + .../schema/typescript/v2/DynamicToolSpec.ts | 2 +- .../app-server-protocol/src/protocol/v2.rs | 86 ++++++- codex-rs/app-server/README.md | 3 + .../app-server/src/codex_message_processor.rs | 3 + .../tests/suite/v2/dynamic_tools.rs | 75 +++++++ codex-rs/core/src/codex.rs | 18 +- codex-rs/core/src/tools/js_repl/mod_tests.rs | 75 +++++++ codex-rs/core/tests/suite/code_mode.rs | 211 +++++++++++++++++- codex-rs/core/tests/suite/sqlite_state.rs | 2 + codex-rs/protocol/src/dynamic_tools.rs | 94 +++++++- ...019_thread_dynamic_tools_defer_loading.sql | 2 + codex-rs/state/src/runtime/threads.rs | 9 +- 16 files changed, 578 insertions(+), 14 deletions(-) create mode 100644 codex-rs/state/migrations/0019_thread_dynamic_tools_defer_loading.sql diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 2dd4409f2..6ccec6fe8 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -506,6 +506,9 @@ }, "DynamicToolSpec": { "properties": { + "deferLoading": { + "type": "boolean" + }, "description": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 57363c925..720f7b0e7 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -7050,6 +7050,9 @@ }, "DynamicToolSpec": { "properties": { + "deferLoading": { + "type": "boolean" + }, "description": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json index 29eb9cad5..25d688373 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.v2.schemas.json @@ -3651,6 +3651,9 @@ }, "DynamicToolSpec": { "properties": { + "deferLoading": { + "type": "boolean" + }, "description": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartParams.json index eb718fc0c..b4391c7ab 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadStartParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadStartParams.json @@ -61,6 +61,9 @@ }, "DynamicToolSpec": { "properties": { + "deferLoading": { + "type": "boolean" + }, "description": { "type": "string" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolSpec.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolSpec.ts index 8b39793f3..18596e31b 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolSpec.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolSpec.ts @@ -3,4 +3,4 @@ // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. import type { JsonValue } from "../serde_json/JsonValue"; -export type DynamicToolSpec = { name: string, description: string, inputSchema: JsonValue, }; +export type DynamicToolSpec = { name: string, description: string, inputSchema: JsonValue, deferLoading?: boolean, }; diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 172f427f4..a074ae647 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -535,13 +535,48 @@ pub struct ToolsV2 { pub view_image: Option, } -#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[derive(Serialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct DynamicToolSpec { pub name: String, pub description: String, pub input_schema: JsonValue, + #[serde(default, skip_serializing_if = "std::ops::Not::not")] + pub defer_loading: bool, +} + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct DynamicToolSpecDe { + name: String, + description: String, + input_schema: JsonValue, + defer_loading: Option, + expose_to_context: Option, +} + +impl<'de> Deserialize<'de> for DynamicToolSpec { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let DynamicToolSpecDe { + name, + description, + input_schema, + defer_loading, + expose_to_context, + } = DynamicToolSpecDe::deserialize(deserializer)?; + + Ok(Self { + name, + description, + input_schema, + defer_loading: defer_loading + .unwrap_or_else(|| expose_to_context.map(|visible| !visible).unwrap_or(false)), + }) + } } #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS, ExperimentalApi)] @@ -7655,6 +7690,55 @@ mod tests { ); } + #[test] + fn dynamic_tool_spec_deserializes_defer_loading() { + let value = json!({ + "name": "lookup_ticket", + "description": "Fetch a ticket", + "inputSchema": { + "type": "object", + "properties": { + "id": { "type": "string" } + } + }, + "deferLoading": true, + }); + + let actual: DynamicToolSpec = serde_json::from_value(value).expect("deserialize"); + + assert_eq!( + actual, + DynamicToolSpec { + name: "lookup_ticket".to_string(), + description: "Fetch a ticket".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "id": { "type": "string" } + } + }), + defer_loading: true, + } + ); + } + + #[test] + fn dynamic_tool_spec_legacy_expose_to_context_inverts_to_defer_loading() { + let value = json!({ + "name": "lookup_ticket", + "description": "Fetch a ticket", + "inputSchema": { + "type": "object", + "properties": {} + }, + "exposeToContext": false, + }); + + let actual: DynamicToolSpec = serde_json::from_value(value).expect("deserialize"); + + assert!(actual.defer_loading); + } + #[test] fn thread_start_params_preserve_explicit_null_service_tier() { let params: ThreadStartParams = serde_json::from_value(json!({ "serviceTier": null })) diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 2a64d3cf7..4dcf93bc2 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -205,6 +205,7 @@ Start a fresh thread when you need a new Codex conversation. { "name": "lookup_ticket", "description": "Fetch a ticket by id", + "deferLoading": true, "inputSchema": { "type": "object", "properties": { @@ -991,6 +992,8 @@ If the session approval policy uses `Granular` with `request_permissions: false` `dynamicTools` on `thread/start` and the corresponding `item/tool/call` request/response flow are experimental APIs. To enable them, set `initialize.params.capabilities.experimentalApi = true`. +Each dynamic tool may set `deferLoading`. When omitted, it defaults to `false`. Set it to `true` to keep the tool registered and callable by runtime features such as `js_repl`, while excluding it from the model-facing tool list sent on ordinary turns. + When a dynamic tool is invoked during a turn, the server sends an `item/tool/call` JSON-RPC request to the client: ```json diff --git a/codex-rs/app-server/src/codex_message_processor.rs b/codex-rs/app-server/src/codex_message_processor.rs index 4583df6e4..716559e94 100644 --- a/codex-rs/app-server/src/codex_message_processor.rs +++ b/codex-rs/app-server/src/codex_message_processor.rs @@ -2007,6 +2007,7 @@ impl CodexMessageProcessor { name: tool.name, description: tool.description, input_schema: tool.input_schema, + defer_loading: tool.defer_loading, }) .collect() }; @@ -8185,6 +8186,7 @@ mod tests { name: "my_tool".to_string(), description: "test".to_string(), input_schema: json!({"type": "null"}), + defer_loading: false, }]; let err = validate_dynamic_tools(&tools).expect_err("invalid schema"); assert!(err.contains("my_tool"), "unexpected error: {err}"); @@ -8197,6 +8199,7 @@ mod tests { description: "test".to_string(), // Missing `type` is common; core sanitizes these to a supported schema. input_schema: json!({"properties": {}}), + defer_loading: false, }]; validate_dynamic_tools(&tools).expect("valid schema"); } diff --git a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs index 338593c46..0ab3f4723 100644 --- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs +++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs @@ -61,6 +61,7 @@ async fn thread_start_injects_dynamic_tools_into_model_requests() -> Result<()> name: "demo_tool".to_string(), description: "Demo dynamic tool".to_string(), input_schema: input_schema.clone(), + defer_loading: false, }; // Thread start injects dynamic tools into the thread's tool registry. @@ -118,6 +119,78 @@ async fn thread_start_injects_dynamic_tools_into_model_requests() -> Result<()> Ok(()) } +#[tokio::test] +async fn thread_start_keeps_hidden_dynamic_tools_out_of_model_requests() -> Result<()> { + let responses = vec![create_final_assistant_message_sse_response("Done")?]; + let server = create_mock_responses_server_sequence_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let dynamic_tool = DynamicToolSpec { + name: "hidden_tool".to_string(), + description: "Hidden dynamic tool".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "city": { "type": "string" } + }, + "required": ["city"], + "additionalProperties": false, + }), + defer_loading: true, + }; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + dynamic_tools: Some(vec![dynamic_tool.clone()]), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id, + input: vec![V2UserInput::Text { + text: "Hello".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let _turn: TurnStartResponse = to_response::(turn_resp)?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let bodies = responses_bodies(&server).await?; + assert!( + bodies + .iter() + .all(|body| find_tool(body, &dynamic_tool.name).is_none()), + "hidden dynamic tool should not be sent to the model" + ); + + Ok(()) +} + /// Exercises the full dynamic tool call path (server request, client response, model output). #[tokio::test] async fn dynamic_tool_call_round_trip_sends_text_content_items_to_model() -> Result<()> { @@ -154,6 +227,7 @@ async fn dynamic_tool_call_round_trip_sends_text_content_items_to_model() -> Res "required": ["city"], "additionalProperties": false, }), + defer_loading: false, }; let thread_req = mcp @@ -322,6 +396,7 @@ async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<( "required": ["city"], "additionalProperties": false, }), + defer_loading: false, }; let thread_req = mcp diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 68bb4b438..4aaa1df28 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -6219,9 +6219,25 @@ fn build_prompt( turn_context: &TurnContext, base_instructions: BaseInstructions, ) -> Prompt { + let deferred_dynamic_tools = turn_context + .dynamic_tools + .iter() + .filter(|tool| tool.defer_loading) + .map(|tool| tool.name.as_str()) + .collect::>(); + let tools = if deferred_dynamic_tools.is_empty() { + router.model_visible_specs() + } else { + router + .model_visible_specs() + .into_iter() + .filter(|spec| !deferred_dynamic_tools.contains(spec.name())) + .collect() + }; + Prompt { input, - tools: router.model_visible_specs(), + tools, parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls, base_instructions, personality: turn_context.personality, diff --git a/codex-rs/core/src/tools/js_repl/mod_tests.rs b/codex-rs/core/src/tools/js_repl/mod_tests.rs index 48fcbe1a0..db23072ef 100644 --- a/codex-rs/core/src/tools/js_repl/mod_tests.rs +++ b/codex-rs/core/src/tools/js_repl/mod_tests.rs @@ -1851,6 +1851,7 @@ async fn js_repl_emit_image_rejects_mixed_content() -> anyhow::Result<()> { "properties": {}, "additionalProperties": false }), + defer_loading: false, }]) .await; if !turn @@ -1949,6 +1950,7 @@ async fn js_repl_dynamic_tool_response_preserves_js_line_separator_text() -> any "properties": {}, "additionalProperties": false }), + defer_loading: false, }]) .await; @@ -2008,6 +2010,79 @@ console.log(text); Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn js_repl_can_call_hidden_dynamic_tools() -> anyhow::Result<()> { + if !can_run_js_repl_runtime_tests().await { + return Ok(()); + } + + let (session, turn, rx_event) = + make_session_and_context_with_dynamic_tools_and_rx(vec![DynamicToolSpec { + name: "hidden_dynamic_tool".to_string(), + description: "A hidden dynamic tool.".to_string(), + input_schema: serde_json::json!({ + "type": "object", + "properties": { + "city": { "type": "string" } + }, + "required": ["city"], + "additionalProperties": false + }), + defer_loading: true, + }]) + .await; + + *session.active_turn.lock().await = Some(crate::state::ActiveTurn::default()); + + let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::default())); + let manager = turn.js_repl.manager().await?; + let code = r#" +const out = await codex.tool("hidden_dynamic_tool", { city: "Paris" }); +console.log(JSON.stringify(out)); +"#; + + let session_for_response = Arc::clone(&session); + let response_watcher = async move { + loop { + let event = tokio::time::timeout(Duration::from_secs(2), rx_event.recv()).await??; + if let EventMsg::DynamicToolCallRequest(request) = event.msg { + session_for_response + .notify_dynamic_tool_response( + &request.call_id, + DynamicToolResponse { + content_items: vec![DynamicToolCallOutputContentItem::InputText { + text: "hidden-ok".to_string(), + }], + success: true, + }, + ) + .await; + return Ok::<(), anyhow::Error>(()); + } + } + }; + + let (result, response_watcher_result) = tokio::join!( + manager.execute( + Arc::clone(&session), + Arc::clone(&turn), + tracker, + JsReplArgs { + code: code.to_string(), + timeout_ms: Some(15_000), + }, + ), + response_watcher, + ); + + let result = result?; + response_watcher_result?; + assert!(result.output.contains("hidden-ok")); + assert!(session.get_pending_input().await.is_empty()); + + Ok(()) +} + #[tokio::test] async fn js_repl_prefers_env_node_module_dirs_over_config() -> anyhow::Result<()> { if !can_run_js_repl_runtime_tests().await { diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 08867c514..1ead4cb89 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -4,6 +4,14 @@ use anyhow::Result; use codex_core::config::types::McpServerConfig; use codex_core::config::types::McpServerTransportConfig; use codex_core::features::Feature; +use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; +use codex_protocol::dynamic_tools::DynamicToolResponse; +use codex_protocol::dynamic_tools::DynamicToolSpec; +use codex_protocol::protocol::AskForApproval; +use codex_protocol::protocol::EventMsg; +use codex_protocol::protocol::Op; +use codex_protocol::protocol::SandboxPolicy; +use codex_protocol::user_input::UserInput; use core_test_support::assert_regex_match; use core_test_support::responses; use core_test_support::responses::ResponseMock; @@ -17,6 +25,8 @@ use core_test_support::skip_if_no_network; use core_test_support::stdio_server_bin; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use core_test_support::wait_for_event_match; use pretty_assertions::assert_eq; use serde_json::Value; use std::collections::HashMap; @@ -91,18 +101,39 @@ fn custom_tool_output_body_and_success( req: &ResponsesRequest, call_id: &str, ) -> (String, Option) { - let (_, success) = req + let (content, success) = req .custom_tool_call_output_content_and_success(call_id) .expect("custom tool output should be present"); let items = custom_tool_output_items(req, call_id); - let output = items + let text_items = items .iter() - .skip(1) .filter_map(|item| item.get("text").and_then(Value::as_str)) - .collect(); + .collect::>(); + let output = match text_items.as_slice() { + [] => content.unwrap_or_default(), + [only] => (*only).to_string(), + [_, rest @ ..] => rest.concat(), + }; (output, success) } +fn custom_tool_output_last_non_empty_text(req: &ResponsesRequest, call_id: &str) -> Option { + match req.custom_tool_call_output(call_id).get("output") { + Some(Value::String(text)) if !text.trim().is_empty() => Some(text.clone()), + Some(Value::Array(items)) => items + .iter() + .filter_map(|item| item.get("text").and_then(Value::as_str)) + .rfind(|text| !text.trim().is_empty()) + .map(str::to_string), + Some(Value::String(_)) + | Some(Value::Object(_)) + | Some(Value::Number(_)) + | Some(Value::Bool(_)) + | Some(Value::Null) + | None => None, + } +} + async fn run_code_mode_turn( server: &MockServer, prompt: &str, @@ -1506,6 +1537,10 @@ text({ json: true }); let req = second_mock.single_request(); let (output, success) = custom_tool_output_body_and_success(&req, "call-1"); + eprintln!( + "hidden dynamic tool raw output: {}", + req.custom_tool_call_output("call-1") + ); assert_ne!( success, Some(false), @@ -1920,7 +1955,10 @@ text(JSON.stringify(tool)); "exec ALL_TOOLS lookup failed unexpectedly: {output}" ); - let parsed: Value = serde_json::from_str(&output)?; + let parsed: Value = serde_json::from_str( + &custom_tool_output_last_non_empty_text(&req, "call-1") + .expect("exec ALL_TOOLS lookup should emit JSON"), + )?; assert_eq!( parsed, serde_json::json!({ @@ -1955,7 +1993,10 @@ text(JSON.stringify(tool)); "exec ALL_TOOLS MCP lookup failed unexpectedly: {output}" ); - let parsed: Value = serde_json::from_str(&output)?; + let parsed: Value = serde_json::from_str( + &custom_tool_output_last_non_empty_text(&req, "call-1") + .expect("exec ALL_TOOLS MCP lookup should emit JSON"), + )?; assert_eq!( parsed, serde_json::json!({ @@ -1967,6 +2008,159 @@ text(JSON.stringify(tool)); Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_call_hidden_dynamic_tools() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let mut builder = test_codex().with_config(move |config| { + let _ = config.features.enable(Feature::CodeMode); + }); + let base_test = builder.build(&server).await?; + let new_thread = base_test + .thread_manager + .start_thread_with_tools( + base_test.config.clone(), + vec![DynamicToolSpec { + name: "hidden_dynamic_tool".to_string(), + description: "A hidden dynamic tool.".to_string(), + input_schema: serde_json::json!({ + "type": "object", + "properties": { + "city": { "type": "string" } + }, + "required": ["city"], + "additionalProperties": false, + }), + defer_loading: true, + }], + false, + ) + .await?; + let test = TestCodex { + home: base_test.home, + cwd: base_test.cwd, + codex: new_thread.thread, + session_configured: new_thread.session_configured, + config: base_test.config, + thread_manager: base_test.thread_manager, + }; + + let code = r#" +import { ALL_TOOLS, hidden_dynamic_tool } from "tools.js"; + +const tool = ALL_TOOLS.find(({ name }) => name === "hidden_dynamic_tool"); +const out = await hidden_dynamic_tool({ city: "Paris" }); +text( + JSON.stringify({ + name: tool?.name ?? null, + description: tool?.description ?? null, + out, + }) +); +"#; + + responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call("call-1", "exec", code), + ev_completed("resp-1"), + ]), + ) + .await; + + let second_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "use exec to inspect and call hidden tools".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: test.cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: test.session_configured.model.clone(), + effort: None, + summary: None, + service_tier: None, + collaboration_mode: None, + personality: None, + }) + .await?; + + let turn_id = wait_for_event_match(&test.codex, |event| match event { + EventMsg::TurnStarted(event) => Some(event.turn_id.clone()), + _ => None, + }) + .await; + let request = wait_for_event_match(&test.codex, |event| match event { + EventMsg::DynamicToolCallRequest(request) => Some(request.clone()), + _ => None, + }) + .await; + assert_eq!(request.tool, "hidden_dynamic_tool"); + assert_eq!(request.arguments, serde_json::json!({ "city": "Paris" })); + test.codex + .submit(Op::DynamicToolResponse { + id: request.call_id, + response: DynamicToolResponse { + content_items: vec![DynamicToolCallOutputContentItem::InputText { + text: "hidden-ok".to_string(), + }], + success: true, + }, + }) + .await?; + wait_for_event(&test.codex, |event| match event { + EventMsg::TurnComplete(event) => event.turn_id == turn_id, + _ => false, + }) + .await; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_body_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "exec hidden dynamic tool call failed unexpectedly: {output}" + ); + + let parsed: Value = serde_json::from_str( + &custom_tool_output_last_non_empty_text(&req, "call-1") + .expect("exec hidden dynamic tool lookup should emit JSON"), + )?; + assert_eq!( + parsed.get("name"), + Some(&Value::String("hidden_dynamic_tool".to_string())) + ); + assert_eq!( + parsed.get("out"), + Some(&Value::String("hidden-ok".to_string())) + ); + assert!( + parsed + .get("description") + .and_then(Value::as_str) + .is_some_and(|description| { + description.contains("A hidden dynamic tool.") + && description.contains("declare const tools:") + && description.contains("hidden_dynamic_tool(args:") + }) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> { skip_if_no_network!(Ok(())); @@ -2130,7 +2324,10 @@ text(JSON.stringify(load("nb"))); Some(false), "exec load call failed unexpectedly: {second_output}" ); - let loaded: Value = serde_json::from_str(&second_output)?; + let loaded: Value = serde_json::from_str( + &custom_tool_output_last_non_empty_text(&second_request, "call-2") + .expect("exec load call should emit JSON"), + )?; assert_eq!( loaded, serde_json::json!({ "title": "Notebook", "items": [1, true, null] }) diff --git a/codex-rs/core/tests/suite/sqlite_state.rs b/codex-rs/core/tests/suite/sqlite_state.rs index b17219e5f..0252f3e08 100644 --- a/codex-rs/core/tests/suite/sqlite_state.rs +++ b/codex-rs/core/tests/suite/sqlite_state.rs @@ -110,6 +110,7 @@ async fn backfill_scans_existing_rollouts() -> Result<()> { "required": ["city"], "properties": { "city": { "type": "string" } } }), + defer_loading: true, }, DynamicToolSpec { name: "weather_lookup".to_string(), @@ -119,6 +120,7 @@ async fn backfill_scans_existing_rollouts() -> Result<()> { "required": ["zip"], "properties": { "zip": { "type": "string" } } }), + defer_loading: false, }, ]; let dynamic_tools_for_hook = dynamic_tools.clone(); diff --git a/codex-rs/protocol/src/dynamic_tools.rs b/codex-rs/protocol/src/dynamic_tools.rs index 8b5405f30..8572bb5e8 100644 --- a/codex-rs/protocol/src/dynamic_tools.rs +++ b/codex-rs/protocol/src/dynamic_tools.rs @@ -1,15 +1,18 @@ use schemars::JsonSchema; use serde::Deserialize; +use serde::Deserializer; use serde::Serialize; use serde_json::Value as JsonValue; use ts_rs::TS; -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] +#[derive(Debug, Clone, Serialize, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct DynamicToolSpec { pub name: String, pub description: String, pub input_schema: JsonValue, + #[serde(default)] + pub defer_loading: bool, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] @@ -37,3 +40,92 @@ pub enum DynamicToolCallOutputContentItem { #[serde(rename_all = "camelCase")] InputImage { image_url: String }, } + +#[derive(Deserialize)] +#[serde(rename_all = "camelCase")] +struct DynamicToolSpecDe { + name: String, + description: String, + input_schema: JsonValue, + defer_loading: Option, + expose_to_context: Option, +} + +impl<'de> Deserialize<'de> for DynamicToolSpec { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let DynamicToolSpecDe { + name, + description, + input_schema, + defer_loading, + expose_to_context, + } = DynamicToolSpecDe::deserialize(deserializer)?; + + Ok(Self { + name, + description, + input_schema, + defer_loading: defer_loading + .unwrap_or_else(|| expose_to_context.map(|visible| !visible).unwrap_or(false)), + }) + } +} + +#[cfg(test)] +mod tests { + use super::DynamicToolSpec; + use pretty_assertions::assert_eq; + use serde_json::json; + + #[test] + fn dynamic_tool_spec_deserializes_defer_loading() { + let value = json!({ + "name": "lookup_ticket", + "description": "Fetch a ticket", + "inputSchema": { + "type": "object", + "properties": { + "id": { "type": "string" } + } + }, + "deferLoading": true, + }); + + let actual: DynamicToolSpec = serde_json::from_value(value).expect("deserialize"); + + assert_eq!( + actual, + DynamicToolSpec { + name: "lookup_ticket".to_string(), + description: "Fetch a ticket".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "id": { "type": "string" } + } + }), + defer_loading: true, + } + ); + } + + #[test] + fn dynamic_tool_spec_legacy_expose_to_context_inverts_to_defer_loading() { + let value = json!({ + "name": "lookup_ticket", + "description": "Fetch a ticket", + "inputSchema": { + "type": "object", + "properties": {} + }, + "exposeToContext": false, + }); + + let actual: DynamicToolSpec = serde_json::from_value(value).expect("deserialize"); + + assert!(actual.defer_loading); + } +} diff --git a/codex-rs/state/migrations/0019_thread_dynamic_tools_defer_loading.sql b/codex-rs/state/migrations/0019_thread_dynamic_tools_defer_loading.sql new file mode 100644 index 000000000..4ab59463a --- /dev/null +++ b/codex-rs/state/migrations/0019_thread_dynamic_tools_defer_loading.sql @@ -0,0 +1,2 @@ +ALTER TABLE thread_dynamic_tools +ADD COLUMN defer_loading INTEGER NOT NULL DEFAULT 0; diff --git a/codex-rs/state/src/runtime/threads.rs b/codex-rs/state/src/runtime/threads.rs index 344a89364..7d63776e2 100644 --- a/codex-rs/state/src/runtime/threads.rs +++ b/codex-rs/state/src/runtime/threads.rs @@ -50,7 +50,7 @@ WHERE id = ? ) -> anyhow::Result>> { let rows = sqlx::query( r#" -SELECT name, description, input_schema +SELECT name, description, input_schema, defer_loading FROM thread_dynamic_tools WHERE thread_id = ? ORDER BY position ASC @@ -70,6 +70,7 @@ ORDER BY position ASC name: row.try_get("name")?, description: row.try_get("description")?, input_schema, + defer_loading: row.try_get("defer_loading")?, }); } Ok(Some(tools)) @@ -425,8 +426,9 @@ INSERT INTO thread_dynamic_tools ( position, name, description, - input_schema -) VALUES (?, ?, ?, ?, ?) + input_schema, + defer_loading +) VALUES (?, ?, ?, ?, ?, ?) ON CONFLICT(thread_id, position) DO NOTHING "#, ) @@ -435,6 +437,7 @@ ON CONFLICT(thread_id, position) DO NOTHING .bind(tool.name.as_str()) .bind(tool.description.as_str()) .bind(input_schema) + .bind(tool.defer_loading) .execute(&mut *tx) .await?; }