From 07c22d20f614838dbec1bc8066ec0a23f5e90f2a Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Tue, 10 Mar 2026 16:25:27 -0700 Subject: [PATCH] Add code_mode output helpers for text and images (#14244) Summary - document how code-mode can import `output_text`/`output_image` and ensure `add_content` stays compatible - add a synthetic `@openai/code_mode` module that appends content items and validates inputs - cover the new behavior with integration tests for structured text and image outputs Testing - Not run (not requested) --- codex-rs/core/src/tools/code_mode.rs | 6 +- codex-rs/core/src/tools/code_mode_bridge.js | 4 +- codex-rs/core/src/tools/code_mode_runner.cjs | 87 ++++++++++++--- codex-rs/core/src/tools/spec.rs | 2 +- codex-rs/core/tests/suite/code_mode.rs | 107 +++++++++++++++++++ 5 files changed, 187 insertions(+), 19 deletions(-) diff --git a/codex-rs/core/src/tools/code_mode.rs b/codex-rs/core/src/tools/code_mode.rs index cc6c0af07..abe11b248 100644 --- a/codex-rs/core/src/tools/code_mode.rs +++ b/codex-rs/core/src/tools/code_mode.rs @@ -94,13 +94,13 @@ pub(crate) fn instructions(config: &Config) -> Option { section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n"); section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n"); section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n"); - section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n"); + section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call }` from `@openai/code_mode`. `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n"); section.push_str( "- Function tools require JSON object arguments. Freeform tools require raw strings.\n", ); - section.push_str("- `add_content(value)` is synchronous. It accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n"); + section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n"); section - .push_str("- Only content passed to `add_content(value)` is surfaced back to the model."); + .push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model."); Some(section) } diff --git a/codex-rs/core/src/tools/code_mode_bridge.js b/codex-rs/core/src/tools/code_mode_bridge.js index dcc9bc5bc..362fc985b 100644 --- a/codex-rs/core/src/tools/code_mode_bridge.js +++ b/codex-rs/core/src/tools/code_mode_bridge.js @@ -1,6 +1,8 @@ const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__; const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.tool_name); -const __codexContentItems = []; +const __codexContentItems = Array.isArray(globalThis.__codexContentItems) + ? globalThis.__codexContentItems + : []; function __codexCloneContentItem(item) { if (!item || typeof item !== 'object') { diff --git a/codex-rs/core/src/tools/code_mode_runner.cjs b/codex-rs/core/src/tools/code_mode_runner.cjs index e66f1dffd..e66f9bdb7 100644 --- a/codex-rs/core/src/tools/code_mode_runner.cjs +++ b/codex-rs/core/src/tools/code_mode_runner.cjs @@ -157,6 +157,78 @@ function createToolsModule(context, callTool, enabledTools) { ); } +function ensureContentItems(context) { + if (!Array.isArray(context.__codexContentItems)) { + context.__codexContentItems = []; + } + return context.__codexContentItems; +} + +function serializeOutputText(value) { + if (typeof value === 'string') { + return value; + } + if ( + typeof value === 'undefined' || + value === null || + typeof value === 'boolean' || + typeof value === 'number' || + typeof value === 'bigint' + ) { + return String(value); + } + + const serialized = JSON.stringify(value); + if (typeof serialized === 'string') { + return serialized; + } + + return String(value); +} + +function normalizeOutputImageUrl(value) { + if (typeof value !== 'string' || !value) { + throw new TypeError('output_image expects a non-empty image URL string'); + } + if (/^(?:https?:\/\/|data:)/i.test(value)) { + return value; + } + throw new TypeError('output_image expects an http(s) or data URL'); +} + +function createCodeModeModule(context, state) { + const outputText = (value) => { + const item = { + type: 'input_text', + text: serializeOutputText(value), + }; + ensureContentItems(context).push(item); + return item; + }; + const outputImage = (value) => { + const item = { + type: 'input_image', + image_url: normalizeOutputImageUrl(value), + }; + ensureContentItems(context).push(item); + return item; + }; + + return new SyntheticModule( + ['output_text', 'output_image', 'set_max_output_tokens_per_exec_call'], + function initCodeModeModule() { + this.setExport('output_text', outputText); + this.setExport('output_image', outputImage); + this.setExport('set_max_output_tokens_per_exec_call', (value) => { + const normalized = normalizeMaxOutputTokensPerExecCall(value); + state.maxOutputTokensPerExecCall = normalized; + return normalized; + }); + }, + { context } + ); +} + function namespacesMatch(left, right) { if (left.length !== right.length) { return false; @@ -210,20 +282,6 @@ function createNamespacedToolsModule(context, callTool, enabledTools, namespace) ); } -function createCodeModeModule(context, state) { - return new SyntheticModule( - ['set_max_output_tokens_per_exec_call'], - function initCodeModeModule() { - this.setExport('set_max_output_tokens_per_exec_call', (value) => { - const normalized = normalizeMaxOutputTokensPerExecCall(value); - state.maxOutputTokensPerExecCall = normalized; - return normalized; - }); - }, - { context } - ); -} - function createModuleResolver(context, callTool, enabledTools, state) { const toolsModule = createToolsModule(context, callTool, enabledTools); const codeModeModule = createCodeModeModule(context, state); @@ -285,6 +343,7 @@ async function main() { }; const callTool = createToolCaller(protocol); const context = vm.createContext({ + __codexContentItems: [], __codex_tool_call: callTool, }); diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 3a9c9eb58..e303a22df 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1620,7 +1620,7 @@ source: /[\s\S]+/ enabled_tool_names.join(", ") }; let description = format!( - "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." + "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call }}` from `\"@openai/code_mode\"`; `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." ); ToolSpec::Freeform(FreeformTool { diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 389c81bf4..4aca988ed 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -221,6 +221,113 @@ Total\ output\ lines:\ 1\n Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let (_test, second_mock) = run_code_mode_turn( + &server, + "use code_mode to return structured text", + r#" +import { output_text } from "@openai/code_mode"; + +output_text({ json: true }); +"#, + false, + ) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode call failed unexpectedly: {output}" + ); + assert_eq!(output, r#"{"json":true}"#); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let (_test, second_mock) = run_code_mode_turn( + &server, + "use code_mode to return circular text", + r#" +import { output_text } from "@openai/code_mode"; + +const circular = {}; +circular.self = circular; +output_text(circular); +"#, + false, + ) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(true), + "circular stringify unexpectedly succeeded" + ); + assert!(output.contains("code_mode execution failed")); + assert!(output.contains("Converting circular structure to JSON")); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let (_test, second_mock) = run_code_mode_turn( + &server, + "use code_mode to return images", + r#" +import { output_image } from "@openai/code_mode"; + +output_image("https://example.com/image.jpg"); +output_image("data:image/png;base64,AAA"); +"#, + false, + ) + .await?; + + let req = second_mock.single_request(); + let (_, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode image output failed unexpectedly" + ); + assert_eq!( + req.custom_tool_call_output("call-1"), + serde_json::json!({ + "type": "custom_tool_call_output", + "call_id": "call-1", + "output": [ + { + "type": "input_image", + "image_url": "https://example.com/image.jpg" + }, + { + "type": "input_image", + "image_url": "data:image/png;base64,AAA" + } + ] + }) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { skip_if_no_network!(Ok(()));