diff --git a/codex-rs/core/src/tools/code_mode.rs b/codex-rs/core/src/tools/code_mode.rs index d9a42ead4..cc6c0af07 100644 --- a/codex-rs/core/src/tools/code_mode.rs +++ b/codex-rs/core/src/tools/code_mode.rs @@ -14,6 +14,10 @@ use crate::tools::context::ToolPayload; use crate::tools::js_repl::resolve_compatible_node; use crate::tools::router::ToolCall; use crate::tools::router::ToolCallSource; +use crate::truncate::TruncationPolicy; +use crate::truncate::formatted_truncate_text_content_items_with_policy; +use crate::truncate::truncate_function_output_items_with_policy; +use crate::unified_exec::resolve_max_tokens; use codex_protocol::models::FunctionCallOutputContentItem; use serde::Deserialize; use serde::Serialize; @@ -72,6 +76,8 @@ enum NodeToHostMessage { }, Result { content_items: Vec, + #[serde(default)] + max_output_tokens_per_exec_call: Option, }, } @@ -88,6 +94,7 @@ pub(crate) fn instructions(config: &Config) -> Option { section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n"); section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n"); section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n"); + section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n"); section.push_str( "- Function tools require JSON object arguments. Freeform tools require raw strings.\n", ); @@ -187,8 +194,14 @@ async fn execute_node( }; write_message(&mut stdin, &response).await?; } - NodeToHostMessage::Result { content_items } => { - final_content_items = Some(output_content_items_from_json_values(content_items)?); + NodeToHostMessage::Result { + content_items, + max_output_tokens_per_exec_call, + } => { + final_content_items = Some(truncate_code_mode_result( + output_content_items_from_json_values(content_items)?, + max_output_tokens_per_exec_call, + )); break; } } @@ -261,6 +274,32 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result, + max_output_tokens_per_exec_call: Option, +) -> Vec { + let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call); + if items + .iter() + .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. })) + { + let (mut truncated_items, original_token_count) = + formatted_truncate_text_content_items_with_policy( + &items, + TruncationPolicy::Tokens(max_output_tokens), + ); + if let Some(original_token_count) = original_token_count + && let Some(FunctionCallOutputContentItem::InputText { text }) = + truncated_items.first_mut() + { + *text = format!("Original token count: {original_token_count}\nOutput:\n{text}"); + } + return truncated_items; + } + + truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens)) +} + async fn build_enabled_tools(exec: &ExecContext) -> Vec { let router = build_nested_router(exec).await; let mcp_tool_names = exec diff --git a/codex-rs/core/src/tools/code_mode_runner.cjs b/codex-rs/core/src/tools/code_mode_runner.cjs index 70ba31d4c..e66f1dffd 100644 --- a/codex-rs/core/src/tools/code_mode_runner.cjs +++ b/codex-rs/core/src/tools/code_mode_runner.cjs @@ -4,6 +4,14 @@ const readline = require('node:readline'); const vm = require('node:vm'); const { SourceTextModule, SyntheticModule } = vm; +const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000; + +function normalizeMaxOutputTokensPerExecCall(value) { + if (!Number.isSafeInteger(value) || value < 0) { + throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer'); + } + return value; +} function createProtocol() { const rl = readline.createInterface({ @@ -100,17 +108,20 @@ function isValidIdentifier(name) { return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name); } -function createToolsNamespace(protocol, enabledTools) { +function createToolCaller(protocol) { + return (name, input) => + protocol.request('tool_call', { + name: String(name), + input, + }); +} + +function createToolsNamespace(callTool, enabledTools) { const tools = Object.create(null); for (const { tool_name } of enabledTools) { - const callTool = async (args) => - protocol.request('tool_call', { - name: String(tool_name), - input: args, - }); Object.defineProperty(tools, tool_name, { - value: callTool, + value: async (args) => callTool(tool_name, args), configurable: false, enumerable: true, writable: false, @@ -120,8 +131,8 @@ function createToolsNamespace(protocol, enabledTools) { return Object.freeze(tools); } -function createToolsModule(context, protocol, enabledTools) { - const tools = createToolsNamespace(protocol, enabledTools); +function createToolsModule(context, callTool, enabledTools) { + const tools = createToolsNamespace(callTool, enabledTools); const exportNames = ['tools']; for (const { tool_name } of enabledTools) { @@ -153,7 +164,7 @@ function namespacesMatch(left, right) { return left.every((segment, index) => segment === right[index]); } -function createNamespacedToolsNamespace(protocol, enabledTools, namespace) { +function createNamespacedToolsNamespace(callTool, enabledTools, namespace) { const tools = Object.create(null); for (const tool of enabledTools) { @@ -162,13 +173,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) { continue; } - const callTool = async (args) => - protocol.request('tool_call', { - name: String(tool.tool_name), - input: args, - }); Object.defineProperty(tools, tool.name, { - value: callTool, + value: async (args) => callTool(tool.tool_name, args), configurable: false, enumerable: true, writable: false, @@ -178,8 +184,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) { return Object.freeze(tools); } -function createNamespacedToolsModule(context, protocol, enabledTools, namespace) { - const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace); +function createNamespacedToolsModule(context, callTool, enabledTools, namespace) { + const tools = createNamespacedToolsNamespace(callTool, enabledTools, namespace); const exportNames = ['tools']; for (const exportName of Object.keys(tools)) { @@ -204,14 +210,32 @@ function createNamespacedToolsModule(context, protocol, enabledTools, namespace) ); } -function createModuleResolver(context, protocol, enabledTools) { - const toolsModule = createToolsModule(context, protocol, enabledTools); +function createCodeModeModule(context, state) { + return new SyntheticModule( + ['set_max_output_tokens_per_exec_call'], + function initCodeModeModule() { + this.setExport('set_max_output_tokens_per_exec_call', (value) => { + const normalized = normalizeMaxOutputTokensPerExecCall(value); + state.maxOutputTokensPerExecCall = normalized; + return normalized; + }); + }, + { context } + ); +} + +function createModuleResolver(context, callTool, enabledTools, state) { + const toolsModule = createToolsModule(context, callTool, enabledTools); + const codeModeModule = createCodeModeModule(context, state); const namespacedModules = new Map(); return function resolveModule(specifier) { if (specifier === 'tools.js') { return toolsModule; } + if (specifier === '@openai/code_mode') { + return codeModeModule; + } const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier); if (!namespacedMatch) { @@ -229,45 +253,47 @@ function createModuleResolver(context, protocol, enabledTools) { if (!namespacedModules.has(cacheKey)) { namespacedModules.set( cacheKey, - createNamespacedToolsModule(context, protocol, enabledTools, namespace) + createNamespacedToolsModule(context, callTool, enabledTools, namespace) ); } return namespacedModules.get(cacheKey); }; } -async function runModule(context, protocol, request) { - const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []); +async function runModule(context, protocol, request, state, callTool) { + const resolveModule = createModuleResolver( + context, + callTool, + request.enabled_tools ?? [], + state + ); const mainModule = new SourceTextModule(request.source, { context, identifier: 'code_mode_main.mjs', - importModuleDynamically(specifier) { - return resolveModule(specifier); - }, + importModuleDynamically: async (specifier) => resolveModule(specifier), }); - await mainModule.link(async (specifier) => { - return resolveModule(specifier); - }); + await mainModule.link(resolveModule); await mainModule.evaluate(); } async function main() { const protocol = createProtocol(); const request = await protocol.init; + const state = { + maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL, + }; + const callTool = createToolCaller(protocol); const context = vm.createContext({ - __codex_tool_call: async (name, input) => - protocol.request('tool_call', { - name: String(name), - input, - }), + __codex_tool_call: callTool, }); try { - await runModule(context, protocol, request); + await runModule(context, protocol, request, state, callTool); await protocol.send({ type: 'result', content_items: readContentItems(context), + max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall, }); process.exit(0); } catch (error) { @@ -275,6 +301,7 @@ async function main() { await protocol.send({ type: 'result', content_items: readContentItems(context), + max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall, }); process.exit(1); } diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index a3d2ee538..5d681f64b 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -1621,7 +1621,7 @@ source: /[\s\S]+/ enabled_tool_names.join(", ") }; let description = format!( - "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." + "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." ); ToolSpec::Freeform(FreeformTool { diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index fb275e6d4..927d7c938 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -94,6 +94,51 @@ pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String { } } } + +pub(crate) fn formatted_truncate_text_content_items_with_policy( + items: &[FunctionCallOutputContentItem], + policy: TruncationPolicy, +) -> (Vec, Option) { + let text_segments = items + .iter() + .filter_map(|item| match item { + FunctionCallOutputContentItem::InputText { text } => Some(text.as_str()), + FunctionCallOutputContentItem::InputImage { .. } => None, + }) + .collect::>(); + + if text_segments.is_empty() { + return (items.to_vec(), None); + } + + let mut combined = String::new(); + for text in &text_segments { + if !combined.is_empty() { + combined.push('\n'); + } + combined.push_str(text); + } + + if combined.len() <= policy.byte_budget() { + return (items.to_vec(), None); + } + + let mut out = vec![FunctionCallOutputContentItem::InputText { + text: formatted_truncate_text(&combined, policy), + }]; + out.extend(items.iter().filter_map(|item| match item { + FunctionCallOutputContentItem::InputImage { image_url, detail } => { + Some(FunctionCallOutputContentItem::InputImage { + image_url: image_url.clone(), + detail: *detail, + }) + } + FunctionCallOutputContentItem::InputText { .. } => None, + })); + + (out, Some(approx_token_count(&combined))) +} + /// Globally truncate function output items to fit within the given /// truncation policy's budget, preserving as many text/image items as /// possible and appending a summary for any omitted text items. @@ -319,6 +364,7 @@ mod tests { use super::TruncationPolicy; use super::approx_token_count; use super::formatted_truncate_text; + use super::formatted_truncate_text_content_items_with_policy; use super::split_string; use super::truncate_function_output_items_with_policy; use super::truncate_text; @@ -540,4 +586,92 @@ mod tests { }; assert!(summary_text.contains("omitted 2 text items")); } + + #[test] + fn formatted_truncate_text_content_items_with_policy_returns_original_under_limit() { + let items = vec![ + FunctionCallOutputContentItem::InputText { + text: "alpha".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: String::new(), + }, + FunctionCallOutputContentItem::InputText { + text: "beta".to_string(), + }, + ]; + + let (output, original_token_count) = + formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(32)); + + assert_eq!(output, items); + assert_eq!(original_token_count, None); + } + + #[test] + fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_images() { + let items = vec![ + FunctionCallOutputContentItem::InputText { + text: "abcd".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "img:one".to_string(), + detail: None, + }, + FunctionCallOutputContentItem::InputText { + text: "efgh".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "ijkl".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "img:two".to_string(), + detail: None, + }, + ]; + + let (output, original_token_count) = + formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(8)); + + assert_eq!( + output, + vec![ + FunctionCallOutputContentItem::InputText { + text: "Total output lines: 3\n\nabcd…6 chars truncated…ijkl".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "img:one".to_string(), + detail: None, + }, + FunctionCallOutputContentItem::InputImage { + image_url: "img:two".to_string(), + detail: None, + }, + ] + ); + assert_eq!(original_token_count, Some(4)); + } + + #[test] + fn formatted_truncate_text_content_items_with_policy_merges_all_text_for_token_budget() { + let items = vec![ + FunctionCallOutputContentItem::InputText { + text: "abcdefgh".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "ijklmnop".to_string(), + }, + ]; + + let (output, original_token_count) = + formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Tokens(2)); + + assert_eq!( + output, + vec![FunctionCallOutputContentItem::InputText { + text: "Total output lines: 2\n\nabcd…3 tokens truncated…mnop".to_string(), + }] + ); + assert_eq!(original_token_count, Some(5)); + } } diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 658293e26..389c81bf4 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -4,6 +4,7 @@ use anyhow::Result; use codex_core::config::types::McpServerConfig; use codex_core::config::types::McpServerTransportConfig; use codex_core::features::Feature; +use core_test_support::assert_regex_match; use core_test_support::responses; use core_test_support::responses::ResponseMock; use core_test_support::responses::ResponsesRequest; @@ -175,6 +176,51 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark Ok(()) } +#[cfg_attr(windows, ignore = "no exec_command on Windows")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let (_test, second_mock) = run_code_mode_turn( + &server, + "use code_mode to truncate the final result", + r#" +import { exec_command } from "tools.js"; +import { set_max_output_tokens_per_exec_call } from "@openai/code_mode"; + +set_max_output_tokens_per_exec_call(6); + +add_content(JSON.stringify(await exec_command({ + cmd: "printf 'token one token two token three token four token five token six token seven'", + max_output_tokens: 100 +}))); +"#, + false, + ) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode call failed unexpectedly: {output}" + ); + let expected_pattern = r#"(?sx) +\A +Original\ token\ count:\ \d+\n +Output:\n +Total\ output\ lines:\ 1\n +\n +\{"chunk_id".*…\d+\ tokens\ truncated….* +\z +"#; + assert_regex_match(expected_pattern, &output); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { skip_if_no_network!(Ok(()));