Add output schema to MCP tools and expose MCP tool results in code mode (#14236)

Summary
- drop `McpToolOutput` in favor of `CallToolResult`, moving its helpers
to keep MCP tooling focused on the final result shape
- wire the new schema definitions through code mode, context, handlers,
and spec modules so MCP tools serialize the exact output shape expected
by the model
- extend code mode tests to cover multiple MCP call scenarios and ensure
the serialized data matches the new schema
- refresh JS runner helpers and protocol models alongside the schema
changes

Testing
- Not run (not requested)
This commit is contained in:
pakrym-oai 2026-03-10 15:25:19 -07:00 committed by Michael Bolin
parent d5694529ca
commit ee8f84153e
12 changed files with 659 additions and 137 deletions

View file

@ -58,7 +58,6 @@ use codex_app_server_protocol::AppInfo;
use codex_otel::TelemetryAuthMode;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::McpToolOutput;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::openai_models::ModelsResponse;
@ -1628,7 +1627,7 @@ fn prefers_structured_content_when_present() {
meta: None,
};
let got = McpToolOutput::from(&ctr).into_function_call_output_payload();
let got = ctr.into_function_call_output_payload();
let expected = FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(
serde_json::to_string(&json!({
@ -1710,7 +1709,7 @@ fn falls_back_to_content_when_structured_is_null() {
meta: None,
};
let got = McpToolOutput::from(&ctr).into_function_call_output_payload();
let got = ctr.into_function_call_output_payload();
let expected = FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(
serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(),
@ -1730,7 +1729,7 @@ fn success_flag_reflects_is_error_true() {
meta: None,
};
let got = McpToolOutput::from(&ctr).into_function_call_output_payload();
let got = ctr.into_function_call_output_payload();
let expected = FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(
serde_json::to_string(&json!({ "message": "bad" })).unwrap(),
@ -1750,7 +1749,7 @@ fn success_flag_true_with_no_error_and_content_used() {
meta: None,
};
let got = McpToolOutput::from(&ctr).into_function_call_output_payload();
let got = ctr.into_function_call_output_payload();
let expected = FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(
serde_json::to_string(&vec![text_block("alpha")]).unwrap(),

View file

@ -32,7 +32,6 @@ use crate::protocol::McpToolCallBeginEvent;
use crate::protocol::McpToolCallEndEvent;
use crate::state_db;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::McpToolOutput;
use codex_protocol::openai_models::InputModality;
use codex_protocol::protocol::AskForApproval;
use codex_protocol::protocol::ReviewDecision;
@ -59,7 +58,7 @@ pub(crate) async fn handle_mcp_tool_call(
server: String,
tool_name: String,
arguments: String,
) -> McpToolOutput {
) -> CallToolResult {
// Parse the `arguments` as JSON. An empty string is OK, but invalid JSON
// is not.
let arguments_value = if arguments.trim().is_empty() {
@ -69,7 +68,7 @@ pub(crate) async fn handle_mcp_tool_call(
Ok(value) => Some(value),
Err(e) => {
error!("failed to parse tool call arguments: {e}");
return McpToolOutput::from_error_text(format!("err: {e}"));
return CallToolResult::from_error_text(format!("err: {e}"));
}
}
};
@ -113,7 +112,7 @@ pub(crate) async fn handle_mcp_tool_call(
turn_context
.session_telemetry
.counter("codex.mcp.call", 1, &[("status", status)]);
return McpToolOutput::from_result(result);
return CallToolResult::from_result(result);
}
if let Some(decision) = maybe_request_mcp_tool_approval(
@ -217,7 +216,7 @@ pub(crate) async fn handle_mcp_tool_call(
.session_telemetry
.counter("codex.mcp.call", 1, &[("status", status)]);
return McpToolOutput::from_result(result);
return CallToolResult::from_result(result);
}
let tool_call_begin_event = EventMsg::McpToolCallBegin(McpToolCallBeginEvent {
@ -263,7 +262,7 @@ pub(crate) async fn handle_mcp_tool_call(
.session_telemetry
.counter("codex.mcp.call", 1, &[("status", status)]);
McpToolOutput::from_result(result)
CallToolResult::from_result(result)
}
async fn maybe_mark_thread_memory_mode_polluted(sess: &Session, turn_context: &TurnContext) {

View file

@ -42,6 +42,8 @@ enum CodeModeToolKind {
#[derive(Clone, Debug, Serialize)]
struct EnabledTool {
tool_name: String,
namespace: Vec<String>,
name: String,
kind: CodeModeToolKind,
}
@ -85,7 +87,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n");
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
@ -106,7 +108,7 @@ pub(crate) async fn execute(
turn,
tracker,
};
let enabled_tools = build_enabled_tools(&exec);
let enabled_tools = build_enabled_tools(&exec).await;
let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
execute_node(exec, source, enabled_tools)
.await
@ -259,26 +261,72 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}
fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let router = build_nested_router(exec).await;
let mcp_tool_names = exec
.session
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.await
.into_iter()
.map(|(qualified_name, tool_info)| {
(
qualified_name,
(
vec!["mcp".to_string(), tool_info.server_name],
tool_info.tool_name,
),
)
})
.collect::<std::collections::HashMap<_, _>>();
let mut out = Vec::new();
for spec in router.specs() {
let tool_name = spec.name().to_string();
if tool_name == "code_mode" {
continue;
}
let (namespace, name) = if let Some((namespace, name)) = mcp_tool_names.get(&tool_name) {
(namespace.clone(), name.clone())
} else {
(Vec::new(), tool_name.clone())
};
out.push(EnabledTool {
tool_name,
namespace,
name,
kind: tool_kind_for_spec(&spec),
});
}
out.sort_by(|left, right| left.tool_name.cmp(&right.tool_name));
out.dedup_by(|left, right| left.tool_name == right.tool_name);
out
}
async fn build_nested_router(exec: &ExecContext) -> ToolRouter {
let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools();
let router = ToolRouter::from_config(
let mcp_tools = exec
.session
.services
.mcp_connection_manager
.read()
.await
.list_all_tools()
.await
.into_iter()
.map(|(name, tool_info)| (name, tool_info.tool))
.collect();
ToolRouter::from_config(
&nested_tools_config,
None,
Some(mcp_tools),
None,
exec.turn.dynamic_tools.as_slice(),
);
let mut out = router
.specs()
.into_iter()
.map(|spec| EnabledTool {
name: spec.name().to_string(),
kind: tool_kind_for_spec(&spec),
})
.filter(|tool| tool.name != "code_mode")
.collect::<Vec<_>>();
out.sort_by(|left, right| left.name.cmp(&right.name));
out.dedup_by(|left, right| left.name == right.name);
out
)
}
async fn call_nested_tool(
@ -290,18 +338,23 @@ async fn call_nested_tool(
return JsonValue::String("code_mode cannot invoke itself".to_string());
}
let nested_config = exec.turn.tools_config.for_code_mode_nested_tools();
let router = ToolRouter::from_config(
&nested_config,
None,
None,
exec.turn.dynamic_tools.as_slice(),
);
let router = build_nested_router(&exec).await;
let specs = router.specs();
let payload = match build_nested_tool_payload(&specs, &tool_name, input) {
Ok(payload) => payload,
Err(error) => return JsonValue::String(error),
let payload = if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name).await {
match serialize_function_tool_arguments(&tool_name, input) {
Ok(raw_arguments) => ToolPayload::Mcp {
server,
tool,
raw_arguments,
},
Err(error) => return JsonValue::String(error),
}
} else {
match build_nested_tool_payload(&specs, &tool_name, input) {
Ok(payload) => payload,
Err(error) => return JsonValue::String(error),
}
};
let call = ToolCall {
@ -357,19 +410,24 @@ fn build_function_tool_payload(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let arguments = match input {
None => "{}".to_string(),
Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map))
.map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}"))?,
Some(_) => {
return Err(format!(
"tool `{tool_name}` expects a JSON object for arguments"
));
}
};
let arguments = serialize_function_tool_arguments(tool_name, input)?;
Ok(ToolPayload::Function { arguments })
}
fn serialize_function_tool_arguments(
tool_name: &str,
input: Option<JsonValue>,
) -> Result<String, String> {
match input {
None => Ok("{}".to_string()),
Some(JsonValue::Object(map)) => serde_json::to_string(&JsonValue::Object(map))
.map_err(|err| format!("failed to serialize tool `{tool_name}` arguments: {err}")),
Some(_) => Err(format!(
"tool `{tool_name}` expects a JSON object for arguments"
)),
}
}
fn build_freeform_tool_payload(
tool_name: &str,
input: Option<JsonValue>,

View file

@ -1,5 +1,5 @@
const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__;
const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.name);
const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.tool_name);
const __codexContentItems = [];
function __codexCloneContentItem(item) {

View file

@ -103,13 +103,13 @@ function isValidIdentifier(name) {
function createToolsNamespace(protocol, enabledTools) {
const tools = Object.create(null);
for (const { name } of enabledTools) {
for (const { tool_name } of enabledTools) {
const callTool = async (args) =>
protocol.request('tool_call', {
name: String(name),
name: String(tool_name),
input: args,
});
Object.defineProperty(tools, name, {
Object.defineProperty(tools, tool_name, {
value: callTool,
configurable: false,
enumerable: true,
@ -124,9 +124,9 @@ function createToolsModule(context, protocol, enabledTools) {
const tools = createToolsNamespace(protocol, enabledTools);
const exportNames = ['tools'];
for (const { name } of enabledTools) {
if (name !== 'tools' && isValidIdentifier(name)) {
exportNames.push(name);
for (const { tool_name } of enabledTools) {
if (tool_name !== 'tools' && isValidIdentifier(tool_name)) {
exportNames.push(tool_name);
}
}
@ -146,24 +146,108 @@ function createToolsModule(context, protocol, enabledTools) {
);
}
function namespacesMatch(left, right) {
if (left.length !== right.length) {
return false;
}
return left.every((segment, index) => segment === right[index]);
}
function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
const tools = Object.create(null);
for (const tool of enabledTools) {
const toolNamespace = Array.isArray(tool.namespace) ? tool.namespace : [];
if (!namespacesMatch(toolNamespace, namespace)) {
continue;
}
const callTool = async (args) =>
protocol.request('tool_call', {
name: String(tool.tool_name),
input: args,
});
Object.defineProperty(tools, tool.name, {
value: callTool,
configurable: false,
enumerable: true,
writable: false,
});
}
return Object.freeze(tools);
}
function createNamespacedToolsModule(context, protocol, enabledTools, namespace) {
const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace);
const exportNames = ['tools'];
for (const exportName of Object.keys(tools)) {
if (exportName !== 'tools' && isValidIdentifier(exportName)) {
exportNames.push(exportName);
}
}
const uniqueExportNames = [...new Set(exportNames)];
return new SyntheticModule(
uniqueExportNames,
function initNamespacedToolsModule() {
this.setExport('tools', tools);
for (const exportName of uniqueExportNames) {
if (exportName !== 'tools') {
this.setExport(exportName, tools[exportName]);
}
}
},
{ context }
);
}
function createModuleResolver(context, protocol, enabledTools) {
const toolsModule = createToolsModule(context, protocol, enabledTools);
const namespacedModules = new Map();
return function resolveModule(specifier) {
if (specifier === 'tools.js') {
return toolsModule;
}
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
if (!namespacedMatch) {
throw new Error(`Unsupported import in code_mode: ${specifier}`);
}
const namespace = namespacedMatch[1]
.split('/')
.filter((segment) => segment.length > 0);
if (namespace.length === 0) {
throw new Error(`Unsupported import in code_mode: ${specifier}`);
}
const cacheKey = namespace.join('/');
if (!namespacedModules.has(cacheKey)) {
namespacedModules.set(
cacheKey,
createNamespacedToolsModule(context, protocol, enabledTools, namespace)
);
}
return namespacedModules.get(cacheKey);
};
}
async function runModule(context, protocol, request) {
const toolsModule = createToolsModule(context, protocol, request.enabled_tools ?? []);
const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []);
const mainModule = new SourceTextModule(request.source, {
context,
identifier: 'code_mode_main.mjs',
importModuleDynamically(specifier) {
if (specifier === 'tools.js') {
return toolsModule;
}
throw new Error(`Unsupported import in code_mode: ${specifier}`);
return resolveModule(specifier);
},
});
await mainModule.link(async (specifier) => {
if (specifier === 'tools.js') {
return toolsModule;
}
throw new Error(`Unsupported import in code_mode: ${specifier}`);
return resolveModule(specifier);
});
await mainModule.evaluate();
}

View file

@ -7,10 +7,10 @@ use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text;
use crate::turn_diff_tracker::TurnDiffTracker;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::mcp::CallToolResult;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::models::FunctionCallOutputPayload;
use codex_protocol::models::McpToolOutput;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::ShellToolCallParams;
use codex_protocol::models::function_call_output_content_items_to_text;
@ -82,7 +82,7 @@ pub trait ToolOutput: Send {
}
}
impl ToolOutput for McpToolOutput {
impl ToolOutput for CallToolResult {
fn log_preview(&self) -> String {
let output = self.as_function_call_output_payload();
let preview = output.body.to_text().unwrap_or_else(|| output.to_string());
@ -90,7 +90,7 @@ impl ToolOutput for McpToolOutput {
}
fn success_for_logging(&self) -> bool {
self.success
self.success()
}
fn to_response_item(&self, call_id: &str, _payload: &ToolPayload) -> ResponseInputItem {
@ -99,6 +99,12 @@ impl ToolOutput for McpToolOutput {
output: self.clone(),
}
}
fn code_mode_result(&self, _payload: &ToolPayload) -> JsonValue {
serde_json::to_value(self).unwrap_or_else(|err| {
JsonValue::String(format!("failed to serialize mcp result: {err}"))
})
}
}
pub struct FunctionToolOutput {
@ -272,12 +278,11 @@ fn response_input_to_code_mode_result(response: ResponseInputItem) -> JsonValue
}
},
ResponseInputItem::McpToolCallOutput { output, .. } => {
match output.as_function_call_output_payload().body {
FunctionCallOutputBody::Text(text) => JsonValue::String(text),
FunctionCallOutputBody::ContentItems(items) => {
content_items_to_code_mode_result(&items)
}
}
output.code_mode_result(&ToolPayload::Mcp {
server: String::new(),
tool: String::new(),
raw_arguments: String::new(),
})
}
}
}
@ -413,6 +418,48 @@ mod tests {
}
}
#[test]
fn mcp_code_mode_result_serializes_full_call_tool_result() {
let output = CallToolResult {
content: vec![serde_json::json!({
"type": "text",
"text": "ignored",
})],
structured_content: Some(serde_json::json!({
"threadId": "thread_123",
"content": "done",
})),
is_error: Some(false),
meta: Some(serde_json::json!({
"source": "mcp",
})),
};
let result = output.code_mode_result(&ToolPayload::Mcp {
server: "server".to_string(),
tool: "tool".to_string(),
raw_arguments: "{}".to_string(),
});
assert_eq!(
result,
serde_json::json!({
"content": [{
"type": "text",
"text": "ignored",
}],
"structuredContent": {
"threadId": "thread_123",
"content": "done",
},
"isError": false,
"_meta": {
"source": "mcp",
},
})
);
}
#[test]
fn custom_tool_calls_can_derive_text_from_content_items() {
let payload = ToolPayload::Custom {

View file

@ -7,12 +7,12 @@ use crate::tools::context::ToolInvocation;
use crate::tools::context::ToolPayload;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::McpToolOutput;
use codex_protocol::mcp::CallToolResult;
pub struct McpHandler;
#[async_trait]
impl ToolHandler for McpHandler {
type Output = McpToolOutput;
type Output = CallToolResult;
fn kind(&self) -> ToolKind {
ToolKind::Mcp

View file

@ -622,7 +622,7 @@ impl JsReplManager {
}
ResponseInputItem::McpToolCallOutput { output, .. } => {
let function_output = output.as_function_call_output_payload();
let payload_kind = if output.success {
let payload_kind = if output.success() {
JsReplToolCallPayloadKind::McpResult
} else {
JsReplToolCallPayloadKind::McpErrorResult
@ -634,7 +634,7 @@ impl JsReplManager {
);
summary.payload_item_count = Some(output.content.len());
summary.structured_content_present = Some(output.structured_content.is_some());
summary.result_is_error = Some(!output.success);
summary.result_is_error = Some(!output.success());
summary
}
}

View file

@ -124,9 +124,9 @@ impl ToolCallRuntime {
},
ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
call_id: call.call_id.clone(),
output: codex_protocol::models::McpToolOutput::from_error_text(
Self::abort_message(call, secs),
),
output: codex_protocol::mcp::CallToolResult::from_error_text(Self::abort_message(
call, secs,
)),
},
_ => ResponseInputItem::FunctionCallOutput {
call_id: call.call_id.clone(),

View file

@ -1771,6 +1771,7 @@ pub(crate) fn mcp_tool_to_openai_tool(
let rmcp::model::Tool {
description,
input_schema,
output_schema,
..
} = tool;
@ -1795,13 +1796,19 @@ pub(crate) fn mcp_tool_to_openai_tool(
// `type`, so we coerce/sanitize here for compatibility.
sanitize_json_schema(&mut serialized_input_schema);
let input_schema = serde_json::from_value::<JsonSchema>(serialized_input_schema)?;
let structured_content_schema = output_schema
.map(|output_schema| serde_json::Value::Object(output_schema.as_ref().clone()))
.unwrap_or_else(|| JsonValue::Object(serde_json::Map::new()));
let output_schema = Some(mcp_call_tool_result_output_schema(
structured_content_schema,
));
Ok(ResponsesApiTool {
name: fully_qualified_name,
description: description.map(Into::into).unwrap_or_default(),
strict: false,
parameters: input_schema,
output_schema: None,
output_schema,
})
}
@ -1826,6 +1833,25 @@ pub fn parse_tool_input_schema(input_schema: &JsonValue) -> Result<JsonSchema, s
serde_json::from_value::<JsonSchema>(input_schema)
}
fn mcp_call_tool_result_output_schema(structured_content_schema: JsonValue) -> JsonValue {
json!({
"type": "object",
"properties": {
"content": {
"type": "array",
"items": {}
},
"structuredContent": structured_content_schema,
"isError": {
"type": "boolean"
},
"_meta": {}
},
"required": ["content"],
"additionalProperties": false
})
}
/// Sanitize a JSON Schema (as serde_json::Value) so it can fit our limited
/// JsonSchema enum. This function:
/// - Ensures every schema object has a "type". If missing, infers it from
@ -2299,6 +2325,116 @@ mod tests {
assert_eq!(parameters.get("properties"), Some(&serde_json::json!({})));
}
#[test]
fn mcp_tool_to_openai_tool_preserves_top_level_output_schema() {
let mut input_schema = rmcp::model::JsonObject::new();
input_schema.insert("type".to_string(), serde_json::json!("object"));
let mut output_schema = rmcp::model::JsonObject::new();
output_schema.insert(
"properties".to_string(),
serde_json::json!({
"result": {
"properties": {
"nested": {}
}
}
}),
);
output_schema.insert("required".to_string(), serde_json::json!(["result"]));
let tool = rmcp::model::Tool {
name: "with_output".to_string().into(),
title: None,
description: Some("Has output schema".to_string().into()),
input_schema: std::sync::Arc::new(input_schema),
output_schema: Some(std::sync::Arc::new(output_schema)),
annotations: None,
execution: None,
icons: None,
meta: None,
};
let openai_tool = mcp_tool_to_openai_tool("mcp__server__with_output".to_string(), tool)
.expect("convert tool");
assert_eq!(
openai_tool.output_schema,
Some(serde_json::json!({
"type": "object",
"properties": {
"content": {
"type": "array",
"items": {}
},
"structuredContent": {
"properties": {
"result": {
"properties": {
"nested": {}
}
}
},
"required": ["result"]
},
"isError": {
"type": "boolean"
},
"_meta": {}
},
"required": ["content"],
"additionalProperties": false
}))
);
}
#[test]
fn mcp_tool_to_openai_tool_preserves_output_schema_without_inferred_type() {
let mut input_schema = rmcp::model::JsonObject::new();
input_schema.insert("type".to_string(), serde_json::json!("object"));
let mut output_schema = rmcp::model::JsonObject::new();
output_schema.insert("enum".to_string(), serde_json::json!(["ok", "error"]));
let tool = rmcp::model::Tool {
name: "with_enum_output".to_string().into(),
title: None,
description: Some("Has enum output schema".to_string().into()),
input_schema: std::sync::Arc::new(input_schema),
output_schema: Some(std::sync::Arc::new(output_schema)),
annotations: None,
execution: None,
icons: None,
meta: None,
};
let openai_tool =
mcp_tool_to_openai_tool("mcp__server__with_enum_output".to_string(), tool)
.expect("convert tool");
assert_eq!(
openai_tool.output_schema,
Some(serde_json::json!({
"type": "object",
"properties": {
"content": {
"type": "array",
"items": {}
},
"structuredContent": {
"enum": ["ok", "error"]
},
"isError": {
"type": "boolean"
},
"_meta": {}
},
"required": ["content"],
"additionalProperties": false
}))
);
}
fn tool_name(tool: &ToolSpec) -> &str {
match tool {
ToolSpec::Function(ResponsesApiTool { name, .. }) => name,
@ -3355,7 +3491,7 @@ mod tests {
},
description: "Do something cool".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}
@ -3594,7 +3730,7 @@ mod tests {
},
description: "Search docs".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}
@ -3646,7 +3782,7 @@ mod tests {
},
description: "Pagination".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}
@ -3702,7 +3838,7 @@ mod tests {
},
description: "Tags".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}
@ -3756,7 +3892,7 @@ mod tests {
},
description: "AnyOf Value".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}
@ -4015,7 +4151,7 @@ Examples of valid command strings:
},
description: "Do something cool".to_string(),
strict: false,
output_schema: None,
output_schema: Some(mcp_call_tool_result_output_schema(serde_json::json!({}))),
})
);
}

View file

@ -1,6 +1,8 @@
#![allow(clippy::expect_used, clippy::unwrap_used)]
use anyhow::Result;
use codex_core::config::types::McpServerConfig;
use codex_core::config::types::McpServerTransportConfig;
use codex_core::features::Feature;
use core_test_support::responses;
use core_test_support::responses::ResponseMock;
@ -11,11 +13,14 @@ use core_test_support::responses::ev_custom_tool_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::stdio_server_bin;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use pretty_assertions::assert_eq;
use serde_json::Value;
use std::collections::HashMap;
use std::fs;
use std::time::Duration;
use wiremock::MockServer;
fn custom_tool_output_text_and_success(
@ -63,6 +68,70 @@ async fn run_code_mode_turn(
Ok((test, second_mock))
}
async fn run_code_mode_turn_with_rmcp(
server: &MockServer,
prompt: &str,
code: &str,
) -> Result<(TestCodex, ResponseMock)> {
let rmcp_test_server_bin = stdio_server_bin()?;
let mut builder = test_codex().with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
let mut servers = config.mcp_servers.get().clone();
servers.insert(
"rmcp".to_string(),
McpServerConfig {
transport: McpServerTransportConfig::Stdio {
command: rmcp_test_server_bin,
args: Vec::new(),
env: Some(HashMap::from([(
"MCP_TEST_VALUE".to_string(),
"propagated-env".to_string(),
)])),
env_vars: Vec::new(),
cwd: None,
},
enabled: true,
required: false,
disabled_reason: None,
startup_timeout_sec: Some(Duration::from_secs(10)),
tool_timeout_sec: None,
enabled_tools: None,
disabled_tools: None,
scopes: None,
oauth_resource: None,
},
);
config
.mcp_servers
.set(servers)
.expect("test mcp servers should accept any configuration");
});
let test = builder.build(server).await?;
responses::mount_sse_once(
server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "code_mode", code),
ev_completed("resp-1"),
]),
)
.await;
let second_mock = responses::mount_sse_once(
server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
test.submit_turn(prompt).await?;
Ok((test, second_mock))
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_return_exec_command_output() -> Result<()> {
@ -135,3 +204,170 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { echo } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await echo({
message: "ping",
});
add_content(
`echo=${structuredContent?.echo ?? "missing"}\n` +
`env=${structuredContent?.env ?? "missing"}\n` +
`isError=${String(isError)}\n` +
`contentLength=${content.length}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode rmcp echo call failed unexpectedly: {output}"
);
assert_eq!(
output,
"echo=ECHOING: ping
env=propagated-env
isError=false
contentLength=0"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_access_namespaced_mcp_tool_from_flat_tools_namespace() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { tools } from "tools.js";
const { structuredContent, isError } = await tools["mcp__rmcp__echo"]({
message: "ping",
});
add_content(
`echo=${structuredContent?.echo ?? "missing"}\n` +
`env=${structuredContent?.env ?? "missing"}\n` +
`isError=${String(isError)}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode rmcp echo call failed unexpectedly: {output}"
);
assert_eq!(
output,
"echo=ECHOING: ping
env=propagated-env
isError=false"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { image_scenario } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await image_scenario({
scenario: "text_only",
caption: "caption from mcp",
});
add_content(
`firstType=${content[0]?.type ?? "missing"}\n` +
`firstText=${content[0]?.text ?? "missing"}\n` +
`structuredContent=${String(structuredContent ?? null)}\n` +
`isError=${String(isError)}`
);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
&server,
"use code_mode to run the rmcp image scenario tool",
code,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode rmcp image scenario call failed unexpectedly: {output}"
);
assert_eq!(
output,
"firstType=text
firstText=caption from mcp
structuredContent=null
isError=false"
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { echo } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await echo({});
const firstText = content[0]?.text ?? "";
const mentionsMissingMessage =
firstText.includes("missing field") && firstText.includes("message");
add_content(
`isError=${String(isError)}\n` +
`contentLength=${content.length}\n` +
`mentionsMissingMessage=${String(mentionsMissingMessage)}\n` +
`structuredContent=${String(structuredContent ?? null)}`
);
"#;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use code_mode to call rmcp echo badly", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode rmcp error call failed unexpectedly: {output}"
);
assert_eq!(
output,
"isError=true
contentLength=1
mentionsMissingMessage=true
structuredContent=null"
);
Ok(())
}

View file

@ -206,7 +206,7 @@ pub enum ResponseInputItem {
},
McpToolCallOutput {
call_id: String,
output: McpToolOutput,
output: CallToolResult,
},
CustomToolCallOutput {
call_id: String,
@ -1184,20 +1184,10 @@ impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
}
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "camelCase")]
pub struct McpToolOutput {
pub content: Vec<serde_json::Value>,
#[serde(default, skip_serializing_if = "Option::is_none")]
#[ts(optional)]
pub structured_content: Option<serde_json::Value>,
pub success: bool,
}
impl McpToolOutput {
pub fn from_result(result: Result<CallToolResult, String>) -> Self {
impl CallToolResult {
pub fn from_result(result: Result<Self, String>) -> Self {
match result {
Ok(result) => Self::from(&result),
Ok(result) => result,
Err(error) => Self::from_error_text(error),
}
}
@ -1209,23 +1199,13 @@ impl McpToolOutput {
"text": text,
})],
structured_content: None,
success: false,
is_error: Some(true),
meta: None,
}
}
pub fn into_call_tool_result(self) -> CallToolResult {
let Self {
content,
structured_content,
success,
} = self;
CallToolResult {
content,
structured_content,
is_error: Some(!success),
meta: None,
}
pub fn success(&self) -> bool {
self.is_error != Some(true)
}
pub fn as_function_call_output_payload(&self) -> FunctionCallOutputPayload {
@ -1236,7 +1216,7 @@ impl McpToolOutput {
Ok(serialized_structured_content) => {
return FunctionCallOutputPayload {
body: FunctionCallOutputBody::Text(serialized_structured_content),
success: Some(self.success),
success: Some(self.success()),
};
}
Err(err) => {
@ -1267,7 +1247,7 @@ impl McpToolOutput {
FunctionCallOutputPayload {
body,
success: Some(self.success),
success: Some(self.success()),
}
}
@ -1276,23 +1256,6 @@ impl McpToolOutput {
}
}
impl From<&CallToolResult> for McpToolOutput {
fn from(call_tool_result: &CallToolResult) -> Self {
let CallToolResult {
content,
structured_content,
is_error,
meta: _,
} = call_tool_result;
Self {
content: content.clone(),
structured_content: structured_content.clone(),
success: is_error != &Some(true),
}
}
}
fn convert_mcp_content_to_items(
contents: &[serde_json::Value],
) -> Option<Vec<FunctionCallOutputContentItem>> {
@ -1882,7 +1845,7 @@ mod tests {
meta: None,
};
let payload = McpToolOutput::from(&call_tool_result).into_function_call_output_payload();
let payload = call_tool_result.into_function_call_output_payload();
assert_eq!(payload.success, Some(true));
let Some(items) = payload.content_items() else {
panic!("expected content items");
@ -1949,7 +1912,7 @@ mod tests {
meta: None,
};
let payload = McpToolOutput::from(&call_tool_result).into_function_call_output_payload();
let payload = call_tool_result.into_function_call_output_payload();
let Some(items) = payload.content_items() else {
panic!("expected content items");
};