From ee8f84153efd90d06c7d6f7f3f3eb1ed3a09d9f7 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 10 Mar 2026 15:25:19 -0700 Subject: [PATCH] Add output schema to MCP tools and expose MCP tool results in code mode (#14236) Summary - drop `McpToolOutput` in favor of `CallToolResult`, moving its helpers to keep MCP tooling focused on the final result shape - wire the new schema definitions through code mode, context, handlers, and spec modules so MCP tools serialize the exact output shape expected by the model - extend code mode tests to cover multiple MCP call scenarios and ensure the serialized data matches the new schema - refresh JS runner helpers and protocol models alongside the schema changes Testing - Not run (not requested) --- codex-rs/core/src/codex_tests.rs | 9 +- codex-rs/core/src/mcp_tool_call.rs | 11 +- codex-rs/core/src/tools/code_mode.rs | 134 ++++++++--- codex-rs/core/src/tools/code_mode_bridge.js | 2 +- codex-rs/core/src/tools/code_mode_runner.cjs | 114 +++++++-- codex-rs/core/src/tools/context.rs | 65 ++++- codex-rs/core/src/tools/handlers/mcp.rs | 4 +- codex-rs/core/src/tools/js_repl/mod.rs | 4 +- codex-rs/core/src/tools/parallel.rs | 6 +- codex-rs/core/src/tools/spec.rs | 150 +++++++++++- codex-rs/core/tests/suite/code_mode.rs | 236 +++++++++++++++++++ codex-rs/protocol/src/models.rs | 61 +---- 12 files changed, 659 insertions(+), 137 deletions(-) diff --git a/codex-rs/core/src/codex_tests.rs b/codex-rs/core/src/codex_tests.rs index 311fc8fd3..7a17bdd98 100644 --- a/codex-rs/core/src/codex_tests.rs +++ b/codex-rs/core/src/codex_tests.rs @@ -58,7 +58,6 @@ use codex_app_server_protocol::AppInfo; use codex_otel::TelemetryAuthMode; use codex_protocol::models::BaseInstructions; use codex_protocol::models::ContentItem; -use codex_protocol::models::McpToolOutput; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; use codex_protocol::openai_models::ModelsResponse; @@ -1628,7 +1627,7 @@ fn prefers_structured_content_when_present() { meta: None, }; - let got = McpToolOutput::from(&ctr).into_function_call_output_payload(); + let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ @@ -1710,7 +1709,7 @@ fn falls_back_to_content_when_structured_is_null() { meta: None, }; - let got = McpToolOutput::from(&ctr).into_function_call_output_payload(); + let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(), @@ -1730,7 +1729,7 @@ fn success_flag_reflects_is_error_true() { meta: None, }; - let got = McpToolOutput::from(&ctr).into_function_call_output_payload(); + let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&json!({ "message": "bad" })).unwrap(), @@ -1750,7 +1749,7 @@ fn success_flag_true_with_no_error_and_content_used() { meta: None, }; - let got = McpToolOutput::from(&ctr).into_function_call_output_payload(); + let got = ctr.into_function_call_output_payload(); let expected = FunctionCallOutputPayload { body: FunctionCallOutputBody::Text( serde_json::to_string(&vec![text_block("alpha")]).unwrap(), diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index a9e4a06c8..629f2afe5 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -32,7 +32,6 @@ use crate::protocol::McpToolCallBeginEvent; use crate::protocol::McpToolCallEndEvent; use crate::state_db; use codex_protocol::mcp::CallToolResult; -use codex_protocol::models::McpToolOutput; use codex_protocol::openai_models::InputModality; use codex_protocol::protocol::AskForApproval; use codex_protocol::protocol::ReviewDecision; @@ -59,7 +58,7 @@ pub(crate) async fn handle_mcp_tool_call( server: String, tool_name: String, arguments: String, -) -> McpToolOutput { +) -> CallToolResult { // Parse the `arguments` as JSON. An empty string is OK, but invalid JSON // is not. let arguments_value = if arguments.trim().is_empty() { @@ -69,7 +68,7 @@ pub(crate) async fn handle_mcp_tool_call( Ok(value) => Some(value), Err(e) => { error!("failed to parse tool call arguments: {e}"); - return McpToolOutput::from_error_text(format!("err: {e}")); + return CallToolResult::from_error_text(format!("err: {e}")); } } }; @@ -113,7 +112,7 @@ pub(crate) async fn handle_mcp_tool_call( turn_context .session_telemetry .counter("codex.mcp.call", 1, &[("status", status)]); - return McpToolOutput::from_result(result); + return CallToolResult::from_result(result); } if let Some(decision) = maybe_request_mcp_tool_approval( @@ -217,7 +216,7 @@ pub(crate) async fn handle_mcp_tool_call( .session_telemetry .counter("codex.mcp.call", 1, &[("status", status)]); - return McpToolOutput::from_result(result); + return CallToolResult::from_result(result); } let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent { @@ -263,7 +262,7 @@ pub(crate) async fn handle_mcp_tool_call( .session_telemetry .counter("codex.mcp.call", 1, &[("status", status)]); - McpToolOutput::from_result(result) + CallToolResult::from_result(result) } async fn maybe_mark_thread_memory_mode_polluted(sess: &Session, turn_context: &TurnContext) { diff --git a/codex-rs/core/src/tools/code_mode.rs b/codex-rs/core/src/tools/code_mode.rs index 7fef60f68..d9a42ead4 100644 --- a/codex-rs/core/src/tools/code_mode.rs +++ b/codex-rs/core/src/tools/code_mode.rs @@ -42,6 +42,8 @@ enum CodeModeToolKind { #[derive(Clone, Debug, Serialize)] struct EnabledTool { + tool_name: String, + namespace: Vec, name: String, kind: CodeModeToolKind, } @@ -85,7 +87,7 @@ pub(crate) fn instructions(config: &Config) -> Option { section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n"); section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n"); section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n"); - section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n"); + section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n"); section.push_str( "- Function tools require JSON object arguments. Freeform tools require raw strings.\n", ); @@ -106,7 +108,7 @@ pub(crate) async fn execute( turn, tracker, }; - let enabled_tools = build_enabled_tools(&exec); + let enabled_tools = build_enabled_tools(&exec).await; let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?; execute_node(exec, source, enabled_tools) .await @@ -259,26 +261,72 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result Vec { +async fn build_enabled_tools(exec: &ExecContext) -> Vec { + let router = build_nested_router(exec).await; + let mcp_tool_names = exec + .session + .services + .mcp_connection_manager + .read() + .await + .list_all_tools() + .await + .into_iter() + .map(|(qualified_name, tool_info)| { + ( + qualified_name, + ( + vec!["mcp".to_string(), tool_info.server_name], + tool_info.tool_name, + ), + ) + }) + .collect::>(); + let mut out = Vec::new(); + for spec in router.specs() { + let tool_name = spec.name().to_string(); + if tool_name == "code_mode" { + continue; + } + + let (namespace, name) = if let Some((namespace, name)) = mcp_tool_names.get(&tool_name) { + (namespace.clone(), name.clone()) + } else { + (Vec::new(), tool_name.clone()) + }; + + out.push(EnabledTool { + tool_name, + namespace, + name, + kind: tool_kind_for_spec(&spec), + }); + } + out.sort_by(|left, right| left.tool_name.cmp(&right.tool_name)); + out.dedup_by(|left, right| left.tool_name == right.tool_name); + out +} + +async fn build_nested_router(exec: &ExecContext) -> ToolRouter { let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools(); - let router = ToolRouter::from_config( + let mcp_tools = exec + .session + .services + .mcp_connection_manager + .read() + .await + .list_all_tools() + .await + .into_iter() + .map(|(name, tool_info)| (name, tool_info.tool)) + .collect(); + + ToolRouter::from_config( &nested_tools_config, - None, + Some(mcp_tools), None, exec.turn.dynamic_tools.as_slice(), - ); - let mut out = router - .specs() - .into_iter() - .map(|spec| EnabledTool { - name: spec.name().to_string(), - kind: tool_kind_for_spec(&spec), - }) - .filter(|tool| tool.name != "code_mode") - .collect::>(); - out.sort_by(|left, right| left.name.cmp(&right.name)); - out.dedup_by(|left, right| left.name == right.name); - out + ) } async fn call_nested_tool( @@ -290,18 +338,23 @@ async fn call_nested_tool( return JsonValue::String("code_mode cannot invoke itself".to_string()); } - let nested_config = exec.turn.tools_config.for_code_mode_nested_tools(); - let router = ToolRouter::from_config( - &nested_config, - None, - None, - exec.turn.dynamic_tools.as_slice(), - ); + let router = build_nested_router(&exec).await; let specs = router.specs(); - let payload = match build_nested_tool_payload(&specs, &tool_name, input) { - Ok(payload) => payload, - Err(error) => return JsonValue::String(error), + let payload = if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name).await { + match serialize_function_tool_arguments(&tool_name, input) { + Ok(raw_arguments) => ToolPayload::Mcp { + server, + tool, + raw_arguments, + }, + Err(error) => return JsonValue::String(error), + } + } else { + match build_nested_tool_payload(&specs, &tool_name, input) { + Ok(payload) => payload, + Err(error) => return JsonValue::String(error), + } }; let call = ToolCall { @@ -357,19 +410,24 @@ fn build_function_tool_payload( tool_name: &str, input: Option, ) -> Result { - let arguments = match input { - None => "{}".to_string(), - Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map)) - .map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}"))?, - Some(_) => { - return Err(format!( - "tool `{tool_name}` expects a JSON object for arguments" - )); - } - }; + let arguments = serialize_function_tool_arguments(tool_name, input)?; Ok(ToolPayload::Function { arguments }) } +fn serialize_function_tool_arguments( + tool_name: &str, + input: Option, +) -> Result { + match input { + None => Ok("{}".to_string()), + Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map)) + .map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}")), + Some(_) => Err(format!( + "tool `{tool_name}` expects a JSON object for arguments" + )), + } +} + fn build_freeform_tool_payload( tool_name: &str, input: Option, diff --git a/codex-rs/core/src/tools/code_mode_bridge.js b/codex-rs/core/src/tools/code_mode_bridge.js index aca85f735..dcc9bc5bc 100644 --- a/codex-rs/core/src/tools/code_mode_bridge.js +++ b/codex-rs/core/src/tools/code_mode_bridge.js @@ -1,5 +1,5 @@ const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__; -const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.name); +const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.tool_name); const __codexContentItems = []; function __codexCloneContentItem(item) { diff --git a/codex-rs/core/src/tools/code_mode_runner.cjs b/codex-rs/core/src/tools/code_mode_runner.cjs index e2fac0817..70ba31d4c 100644 --- a/codex-rs/core/src/tools/code_mode_runner.cjs +++ b/codex-rs/core/src/tools/code_mode_runner.cjs @@ -103,13 +103,13 @@ function isValidIdentifier(name) { function createToolsNamespace(protocol, enabledTools) { const tools = Object.create(null); - for (const { name } of enabledTools) { + for (const { tool_name } of enabledTools) { const callTool = async (args) => protocol.request('tool_call', { - name: String(name), + name: String(tool_name), input: args, }); - Object.defineProperty(tools, name, { + Object.defineProperty(tools, tool_name, { value: callTool, configurable: false, enumerable: true, @@ -124,9 +124,9 @@ function createToolsModule(context, protocol, enabledTools) { const tools = createToolsNamespace(protocol, enabledTools); const exportNames = ['tools']; - for (const { name } of enabledTools) { - if (name !== 'tools' && isValidIdentifier(name)) { - exportNames.push(name); + for (const { tool_name } of enabledTools) { + if (tool_name !== 'tools' && isValidIdentifier(tool_name)) { + exportNames.push(tool_name); } } @@ -146,24 +146,108 @@ function createToolsModule(context, protocol, enabledTools) { ); } +function namespacesMatch(left, right) { + if (left.length !== right.length) { + return false; + } + return left.every((segment, index) => segment === right[index]); +} + +function createNamespacedToolsNamespace(protocol, enabledTools, namespace) { + const tools = Object.create(null); + + for (const tool of enabledTools) { + const toolNamespace = Array.isArray(tool.namespace) ? tool.namespace : []; + if (!namespacesMatch(toolNamespace, namespace)) { + continue; + } + + const callTool = async (args) => + protocol.request('tool_call', { + name: String(tool.tool_name), + input: args, + }); + Object.defineProperty(tools, tool.name, { + value: callTool, + configurable: false, + enumerable: true, + writable: false, + }); + } + + return Object.freeze(tools); +} + +function createNamespacedToolsModule(context, protocol, enabledTools, namespace) { + const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace); + const exportNames = ['tools']; + + for (const exportName of Object.keys(tools)) { + if (exportName !== 'tools' && isValidIdentifier(exportName)) { + exportNames.push(exportName); + } + } + + const uniqueExportNames = [...new Set(exportNames)]; + + return new SyntheticModule( + uniqueExportNames, + function initNamespacedToolsModule() { + this.setExport('tools', tools); + for (const exportName of uniqueExportNames) { + if (exportName !== 'tools') { + this.setExport(exportName, tools[exportName]); + } + } + }, + { context } + ); +} + +function createModuleResolver(context, protocol, enabledTools) { + const toolsModule = createToolsModule(context, protocol, enabledTools); + const namespacedModules = new Map(); + + return function resolveModule(specifier) { + if (specifier === 'tools.js') { + return toolsModule; + } + + const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier); + if (!namespacedMatch) { + throw new Error(`Unsupported import in code_mode: ${specifier}`); + } + + const namespace = namespacedMatch[1] + .split('/') + .filter((segment) => segment.length > 0); + if (namespace.length === 0) { + throw new Error(`Unsupported import in code_mode: ${specifier}`); + } + + const cacheKey = namespace.join('/'); + if (!namespacedModules.has(cacheKey)) { + namespacedModules.set( + cacheKey, + createNamespacedToolsModule(context, protocol, enabledTools, namespace) + ); + } + return namespacedModules.get(cacheKey); + }; +} + async function runModule(context, protocol, request) { - const toolsModule = createToolsModule(context, protocol, request.enabled_tools ?? []); + const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []); const mainModule = new SourceTextModule(request.source, { context, identifier: 'code_mode_main.mjs', importModuleDynamically(specifier) { - if (specifier === 'tools.js') { - return toolsModule; - } - throw new Error(`Unsupported import in code_mode: ${specifier}`); + return resolveModule(specifier); }, }); await mainModule.link(async (specifier) => { - if (specifier === 'tools.js') { - return toolsModule; - } - throw new Error(`Unsupported import in code_mode: ${specifier}`); + return resolveModule(specifier); }); await mainModule.evaluate(); } diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index ce6f8ea53..36a328b37 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -7,10 +7,10 @@ use crate::truncate::TruncationPolicy; use crate::truncate::formatted_truncate_text; use crate::turn_diff_tracker::TurnDiffTracker; use crate::unified_exec::resolve_max_tokens; +use codex_protocol::mcp::CallToolResult; use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; -use codex_protocol::models::McpToolOutput; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ShellToolCallParams; use codex_protocol::models::function_call_output_content_items_to_text; @@ -82,7 +82,7 @@ pub trait ToolOutput: Send { } } -impl ToolOutput for McpToolOutput { +impl ToolOutput for CallToolResult { fn log_preview(&self) -> String { let output = self.as_function_call_output_payload(); let preview = output.body.to_text().unwrap_or_else(|| output.to_string()); @@ -90,7 +90,7 @@ impl ToolOutput for McpToolOutput { } fn success_for_logging(&self) -> bool { - self.success + self.success() } fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem { @@ -99,6 +99,12 @@ impl ToolOutput for McpToolOutput { output: self.clone(), } } + + fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue { + serde_json::to_value(self).unwrap_or_else(|err| { + JsonValue::String(format!("failed to serialize mcp result: {err}")) + }) + } } pub struct FunctionToolOutput { @@ -272,12 +278,11 @@ fn response_input_to_code_mode_result(response: ResponseInputItem) -> JsonValue } }, ResponseInputItem::McpToolCallOutput { output, .. } => { - match output.as_function_call_output_payload().body { - FunctionCallOutputBody::Text(text) => JsonValue::String(text), - FunctionCallOutputBody::ContentItems(items) => { - content_items_to_code_mode_result(&items) - } - } + output.code_mode_result(&ToolPayload::Mcp { + server: String::new(), + tool: String::new(), + raw_arguments: String::new(), + }) } } } @@ -413,6 +418,48 @@ mod tests { } } + #[test] + fn mcp_code_mode_result_serializes_full_call_tool_result() { + let output = CallToolResult { + content: vec![serde_json::json!({ + "type": "text", + "text": "ignored", + })], + structured_content: Some(serde_json::json!({ + "threadId": "thread_123", + "content": "done", + })), + is_error: Some(false), + meta: Some(serde_json::json!({ + "source": "mcp", + })), + }; + + let result = output.code_mode_result(&ToolPayload::Mcp { + server: "server".to_string(), + tool: "tool".to_string(), + raw_arguments: "{}".to_string(), + }); + + assert_eq!( + result, + serde_json::json!({ + "content": [{ + "type": "text", + "text": "ignored", + }], + "structuredContent": { + "threadId": "thread_123", + "content": "done", + }, + "isError": false, + "_meta": { + "source": "mcp", + }, + }) + ); + } + #[test] fn custom_tool_calls_can_derive_text_from_content_items() { let payload = ToolPayload::Custom { diff --git a/codex-rs/core/src/tools/handlers/mcp.rs b/codex-rs/core/src/tools/handlers/mcp.rs index 14b6926e8..18e0df25c 100644 --- a/codex-rs/core/src/tools/handlers/mcp.rs +++ b/codex-rs/core/src/tools/handlers/mcp.rs @@ -7,12 +7,12 @@ use crate::tools::context::ToolInvocation; use crate::tools::context::ToolPayload; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; -use codex_protocol::models::McpToolOutput; +use codex_protocol::mcp::CallToolResult; pub struct McpHandler; #[async_trait] impl ToolHandler for McpHandler { - type Output = McpToolOutput; + type Output = CallToolResult; fn kind(&self) -> ToolKind { ToolKind::Mcp diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs index 4ffb92518..d8d043a7d 100644 --- a/codex-rs/core/src/tools/js_repl/mod.rs +++ b/codex-rs/core/src/tools/js_repl/mod.rs @@ -622,7 +622,7 @@ impl JsReplManager { } ResponseInputItem::McpToolCallOutput { output, .. } => { let function_output = output.as_function_call_output_payload(); - let payload_kind = if output.success { + let payload_kind = if output.success() { JsReplToolCallPayloadKind::McpResult } else { JsReplToolCallPayloadKind::McpErrorResult @@ -634,7 +634,7 @@ impl JsReplManager { ); summary.payload_item_count = Some(output.content.len()); summary.structured_content_present = Some(output.structured_content.is_some()); - summary.result_is_error = Some(!output.success); + summary.result_is_error = Some(!output.success()); summary } } diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index e64597675..634d1ca71 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -124,9 +124,9 @@ impl ToolCallRuntime { }, ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput { call_id: call.call_id.clone(), - output: codex_protocol::models::McpToolOutput::from_error_text( - Self::abort_message(call, secs), - ), + output: codex_protocol::mcp::CallToolResult::from_error_text(Self::abort_message( + call, secs, + )), }, _ => ResponseInputItem::FunctionCallOutput { call_id: call.call_id.clone(), diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index ce2107320..a3d2ee538 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1771,6 +1771,7 @@ pub(crate) fn mcp_tool_to_openai_tool( let rmcp::model::Tool { description, input_schema, + output_schema, .. } = tool; @@ -1795,13 +1796,19 @@ pub(crate) fn mcp_tool_to_openai_tool( // `type`, so we coerce/sanitize here for compatibility. sanitize_json_schema(&mut serialized_input_schema); let input_schema = serde_json::from_value::(serialized_input_schema)?; + let structured_content_schema = output_schema + .map(|output_schema| serde_json::Value::Object(output_schema.as_ref().clone())) + .unwrap_or_else(|| JsonValue::Object(serde_json::Map::new())); + let output_schema = Some(mcp_call_tool_result_output_schema( + structured_content_schema, + )); Ok(ResponsesApiTool { name: fully_qualified_name, description: description.map(Into::into).unwrap_or_default(), strict: false, parameters: input_schema, - output_schema: None, + output_schema, }) } @@ -1826,6 +1833,25 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result(input_schema) } +fn mcp_call_tool_result_output_schema(structured_content_schema: JsonValue) -> JsonValue { + json!({ + "type": "object", + "properties": { + "content": { + "type": "array", + "items": {} + }, + "structuredContent": structured_content_schema, + "isError": { + "type": "boolean" + }, + "_meta": {} + }, + "required": ["content"], + "additionalProperties": false + }) +} + /// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited /// JsonSchema enum. This function: /// - Ensures every schema object has a "type". If missing, infers it from @@ -2299,6 +2325,116 @@ mod tests { assert_eq!(parameters.get("properties"), Some(&serde_json::json!({}))); } + #[test] + fn mcp_tool_to_openai_tool_preserves_top_level_output_schema() { + let mut input_schema = rmcp::model::JsonObject::new(); + input_schema.insert("type".to_string(), serde_json::json!("object")); + + let mut output_schema = rmcp::model::JsonObject::new(); + output_schema.insert( + "properties".to_string(), + serde_json::json!({ + "result": { + "properties": { + "nested": {} + } + } + }), + ); + output_schema.insert("required".to_string(), serde_json::json!(["result"])); + + let tool = rmcp::model::Tool { + name: "with_output".to_string().into(), + title: None, + description: Some("Has output schema".to_string().into()), + input_schema: std::sync::Arc::new(input_schema), + output_schema: Some(std::sync::Arc::new(output_schema)), + annotations: None, + execution: None, + icons: None, + meta: None, + }; + + let openai_tool = mcp_tool_to_openai_tool("mcp__server__with_output".to_string(), tool) + .expect("convert tool"); + + assert_eq!( + openai_tool.output_schema, + Some(serde_json::json!({ + "type": "object", + "properties": { + "content": { + "type": "array", + "items": {} + }, + "structuredContent": { + "properties": { + "result": { + "properties": { + "nested": {} + } + } + }, + "required": ["result"] + }, + "isError": { + "type": "boolean" + }, + "_meta": {} + }, + "required": ["content"], + "additionalProperties": false + })) + ); + } + + #[test] + fn mcp_tool_to_openai_tool_preserves_output_schema_without_inferred_type() { + let mut input_schema = rmcp::model::JsonObject::new(); + input_schema.insert("type".to_string(), serde_json::json!("object")); + + let mut output_schema = rmcp::model::JsonObject::new(); + output_schema.insert("enum".to_string(), serde_json::json!(["ok", "error"])); + + let tool = rmcp::model::Tool { + name: "with_enum_output".to_string().into(), + title: None, + description: Some("Has enum output schema".to_string().into()), + input_schema: std::sync::Arc::new(input_schema), + output_schema: Some(std::sync::Arc::new(output_schema)), + annotations: None, + execution: None, + icons: None, + meta: None, + }; + + let openai_tool = + mcp_tool_to_openai_tool("mcp__server__with_enum_output".to_string(), tool) + .expect("convert tool"); + + assert_eq!( + openai_tool.output_schema, + Some(serde_json::json!({ + "type": "object", + "properties": { + "content": { + "type": "array", + "items": {} + }, + "structuredContent": { + "enum": ["ok", "error"] + }, + "isError": { + "type": "boolean" + }, + "_meta": {} + }, + "required": ["content"], + "additionalProperties": false + })) + ); + } + fn tool_name(tool: &ToolSpec) -> &str { match tool { ToolSpec::Function(ResponsesApiTool { name, .. }) => name, @@ -3355,7 +3491,7 @@ mod tests { }, description: "Do something cool".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } @@ -3594,7 +3730,7 @@ mod tests { }, description: "Search docs".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } @@ -3646,7 +3782,7 @@ mod tests { }, description: "Pagination".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } @@ -3702,7 +3838,7 @@ mod tests { }, description: "Tags".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } @@ -3756,7 +3892,7 @@ mod tests { }, description: "AnyOf Value".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } @@ -4015,7 +4151,7 @@ Examples of valid command strings: }, description: "Do something cool".to_string(), strict: false, - output_schema: None, + output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))), }) ); } diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index a77ccf5a1..658293e26 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -1,6 +1,8 @@ #![allow(clippy::expect_used, clippy::unwrap_used)] use anyhow::Result; +use codex_core::config::types::McpServerConfig; +use codex_core::config::types::McpServerTransportConfig; use codex_core::features::Feature; use core_test_support::responses; use core_test_support::responses::ResponseMock; @@ -11,11 +13,14 @@ use core_test_support::responses::ev_custom_tool_call; use core_test_support::responses::ev_response_created; use core_test_support::responses::sse; use core_test_support::skip_if_no_network; +use core_test_support::stdio_server_bin; use core_test_support::test_codex::TestCodex; use core_test_support::test_codex::test_codex; use pretty_assertions::assert_eq; use serde_json::Value; +use std::collections::HashMap; use std::fs; +use std::time::Duration; use wiremock::MockServer; fn custom_tool_output_text_and_success( @@ -63,6 +68,70 @@ async fn run_code_mode_turn( Ok((test, second_mock)) } +async fn run_code_mode_turn_with_rmcp( + server: &MockServer, + prompt: &str, + code: &str, +) -> Result<(TestCodex, ResponseMock)> { + let rmcp_test_server_bin = stdio_server_bin()?; + let mut builder = test_codex().with_config(move |config| { + let _ = config.features.enable(Feature::CodeMode); + + let mut servers = config.mcp_servers.get().clone(); + servers.insert( + "rmcp".to_string(), + McpServerConfig { + transport: McpServerTransportConfig::Stdio { + command: rmcp_test_server_bin, + args: Vec::new(), + env: Some(HashMap::from([( + "MCP_TEST_VALUE".to_string(), + "propagated-env".to_string(), + )])), + env_vars: Vec::new(), + cwd: None, + }, + enabled: true, + required: false, + disabled_reason: None, + startup_timeout_sec: Some(Duration::from_secs(10)), + tool_timeout_sec: None, + enabled_tools: None, + disabled_tools: None, + scopes: None, + oauth_resource: None, + }, + ); + config + .mcp_servers + .set(servers) + .expect("test mcp servers should accept any configuration"); + }); + let test = builder.build(server).await?; + + responses::mount_sse_once( + server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call("call-1", "code_mode", code), + ev_completed("resp-1"), + ]), + ) + .await; + + let second_mock = responses::mount_sse_once( + server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + test.submit_turn(prompt).await?; + Ok((test, second_mock)) +} + #[cfg_attr(windows, ignore = "no exec_command on Windows")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_can_return_exec_command_output() -> Result<()> { @@ -135,3 +204,170 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { Ok(()) } + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let code = r#" +import { echo } from "tools/mcp/rmcp.js"; + +const { content, structuredContent, isError } = await echo({ + message: "ping", +}); +add_content( + `echo=${structuredContent?.echo ?? "missing"}\n` + + `env=${structuredContent?.env ?? "missing"}\n` + + `isError=${String(isError)}\n` + + `contentLength=${content.length}` +); +"#; + + let (_test, second_mock) = + run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode rmcp echo call failed unexpectedly: {output}" + ); + assert_eq!( + output, + "echo=ECHOING: ping +env=propagated-env +isError=false +contentLength=0" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_access_namespaced_mcp_tool_from_flat_tools_namespace() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let code = r#" +import { tools } from "tools.js"; + +const { structuredContent, isError } = await tools["mcp__rmcp__echo"]({ + message: "ping", +}); +add_content( + `echo=${structuredContent?.echo ?? "missing"}\n` + + `env=${structuredContent?.env ?? "missing"}\n` + + `isError=${String(isError)}` +); +"#; + + let (_test, second_mock) = + run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode rmcp echo call failed unexpectedly: {output}" + ); + assert_eq!( + output, + "echo=ECHOING: ping +env=propagated-env +isError=false" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let code = r#" +import { image_scenario } from "tools/mcp/rmcp.js"; + +const { content, structuredContent, isError } = await image_scenario({ + scenario: "text_only", + caption: "caption from mcp", +}); +add_content( + `firstType=${content[0]?.type ?? "missing"}\n` + + `firstText=${content[0]?.text ?? "missing"}\n` + + `structuredContent=${String(structuredContent ?? null)}\n` + + `isError=${String(isError)}` +); +"#; + + let (_test, second_mock) = run_code_mode_turn_with_rmcp( + &server, + "use code_mode to run the rmcp image scenario tool", + code, + ) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode rmcp image scenario call failed unexpectedly: {output}" + ); + assert_eq!( + output, + "firstType=text +firstText=caption from mcp +structuredContent=null +isError=false" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let code = r#" +import { echo } from "tools/mcp/rmcp.js"; + +const { content, structuredContent, isError } = await echo({}); +const firstText = content[0]?.text ?? ""; +const mentionsMissingMessage = + firstText.includes("missing field") && firstText.includes("message"); +add_content( + `isError=${String(isError)}\n` + + `contentLength=${content.length}\n` + + `mentionsMissingMessage=${String(mentionsMissingMessage)}\n` + + `structuredContent=${String(structuredContent ?? null)}` +); +"#; + + let (_test, second_mock) = + run_code_mode_turn_with_rmcp(&server, "use code_mode to call rmcp echo badly", code) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode rmcp error call failed unexpectedly: {output}" + ); + assert_eq!( + output, + "isError=true +contentLength=1 +mentionsMissingMessage=true +structuredContent=null" + ); + + Ok(()) +} diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index ca15ea951..28f6f5d60 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -206,7 +206,7 @@ pub enum ResponseInputItem { }, McpToolCallOutput { call_id: String, - output: McpToolOutput, + output: CallToolResult, }, CustomToolCallOutput { call_id: String, @@ -1184,20 +1184,10 @@ impl<'de> Deserialize<'de> for FunctionCallOutputPayload { } } -#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] -#[serde(rename_all = "camelCase")] -pub struct McpToolOutput { - pub content: Vec, - #[serde(default, skip_serializing_if = "Option::is_none")] - #[ts(optional)] - pub structured_content: Option, - pub success: bool, -} - -impl McpToolOutput { - pub fn from_result(result: Result) -> Self { +impl CallToolResult { + pub fn from_result(result: Result) -> Self { match result { - Ok(result) => Self::from(&result), + Ok(result) => result, Err(error) => Self::from_error_text(error), } } @@ -1209,23 +1199,13 @@ impl McpToolOutput { "text": text, })], structured_content: None, - success: false, + is_error: Some(true), + meta: None, } } - pub fn into_call_tool_result(self) -> CallToolResult { - let Self { - content, - structured_content, - success, - } = self; - - CallToolResult { - content, - structured_content, - is_error: Some(!success), - meta: None, - } + pub fn success(&self) -> bool { + self.is_error != Some(true) } pub fn as_function_call_output_payload(&self) -> FunctionCallOutputPayload { @@ -1236,7 +1216,7 @@ impl McpToolOutput { Ok(serialized_structured_content) => { return FunctionCallOutputPayload { body: FunctionCallOutputBody::Text(serialized_structured_content), - success: Some(self.success), + success: Some(self.success()), }; } Err(err) => { @@ -1267,7 +1247,7 @@ impl McpToolOutput { FunctionCallOutputPayload { body, - success: Some(self.success), + success: Some(self.success()), } } @@ -1276,23 +1256,6 @@ impl McpToolOutput { } } -impl From<&CallToolResult> for McpToolOutput { - fn from(call_tool_result: &CallToolResult) -> Self { - let CallToolResult { - content, - structured_content, - is_error, - meta: _, - } = call_tool_result; - - Self { - content: content.clone(), - structured_content: structured_content.clone(), - success: is_error != &Some(true), - } - } -} - fn convert_mcp_content_to_items( contents: &[serde_json::Value], ) -> Option> { @@ -1882,7 +1845,7 @@ mod tests { meta: None, }; - let payload = McpToolOutput::from(&call_tool_result).into_function_call_output_payload(); + let payload = call_tool_result.into_function_call_output_payload(); assert_eq!(payload.success, Some(true)); let Some(items) = payload.content_items() else { panic!("expected content items"); @@ -1949,7 +1912,7 @@ mod tests { meta: None, }; - let payload = McpToolOutput::from(&call_tool_result).into_function_call_output_payload(); + let payload = call_tool_result.into_function_call_output_payload(); let Some(items) = payload.content_items() else { panic!("expected content items"); };