Add model-controlled truncation for code mode results (#14258)

Summary - document that `@openai/code_mode` exposes `set_max_output_tokens_per_exec_call` and that `code_mode` truncates the final Rust-side output when the budget is exceeded - enforce the configured budget in the Rust tool runner, reusing truncation helpers so text-only outputs follow the unified-exec wrapper and mixed outputs still fit within the limit - ensure the new behavior is covered by a code-mode integration test and string spec update Testing - Not run (not requested)
2026-03-10 15:57:14 -07:00 · 2026-03-10 15:57:14 -07:00 · 3d41ff0b77
commit 3d41ff0b77
parent ee8f84153e
5 changed files with 284 additions and 38 deletions
--- a/codex-rs/core/src/tools/code_mode.rs
+++ b/codex-rs/core/src/tools/code_mode.rs
@ -14,6 +14,10 @@ use crate::tools::context::ToolPayload;
 use crate::tools::js_repl::resolve_compatible_node;
 use crate::tools::router::ToolCall;
 use crate::tools::router::ToolCallSource;
+use crate::truncate::TruncationPolicy;
+use crate::truncate::formatted_truncate_text_content_items_with_policy;
+use crate::truncate::truncate_function_output_items_with_policy;
+use crate::unified_exec::resolve_max_tokens;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use serde::Deserialize;
 use serde::Serialize;
@ -72,6 +76,8 @@ enum NodeToHostMessage {
    },
    Result {
        content_items: Vec<JsonValue>,
+        #[serde(default)]
+        max_output_tokens_per_exec_call: Option<usize>,
    },
 }

@ -88,6 +94,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
    section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
    section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
+    section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
    section.push_str(
        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
    );
@ -187,8 +194,14 @@ async fn execute_node(
                };
                write_message(&mut stdin, &response).await?;
            }
-            NodeToHostMessage::Result { content_items } => {
-                final_content_items = Some(output_content_items_from_json_values(content_items)?);
+            NodeToHostMessage::Result {
+                content_items,
+                max_output_tokens_per_exec_call,
+            } => {
+                final_content_items = Some(truncate_code_mode_result(
+                    output_content_items_from_json_values(content_items)?,
+                    max_output_tokens_per_exec_call,
+                ));
                break;
            }
        }
@ -261,6 +274,32 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String
        .replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
 }

+fn truncate_code_mode_result(
+    items: Vec<FunctionCallOutputContentItem>,
+    max_output_tokens_per_exec_call: Option<usize>,
+) -> Vec<FunctionCallOutputContentItem> {
+    let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
+    if items
+        .iter()
+        .all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
+    {
+        let (mut truncated_items, original_token_count) =
+            formatted_truncate_text_content_items_with_policy(
+                &items,
+                TruncationPolicy::Tokens(max_output_tokens),
+            );
+        if let Some(original_token_count) = original_token_count
+            && let Some(FunctionCallOutputContentItem::InputText { text }) =
+                truncated_items.first_mut()
+        {
+            *text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
+        }
+        return truncated_items;
+    }
+
+    truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
+}
+
 async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
    let router = build_nested_router(exec).await;
    let mcp_tool_names = exec
--- a/codex-rs/core/src/tools/code_mode_runner.cjs
+++ b/codex-rs/core/src/tools/code_mode_runner.cjs
@ -4,6 +4,14 @@ const readline = require('node:readline');
 const vm = require('node:vm');

 const { SourceTextModule, SyntheticModule } = vm;
+const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000;
+
+function normalizeMaxOutputTokensPerExecCall(value) {
+  if (!Number.isSafeInteger(value) || value < 0) {
+    throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
+  }
+  return value;
+}

 function createProtocol() {
  const rl = readline.createInterface({
@ -100,17 +108,20 @@ function isValidIdentifier(name) {
  return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
 }

-function createToolsNamespace(protocol, enabledTools) {
+function createToolCaller(protocol) {
+  return (name, input) =>
+    protocol.request('tool_call', {
+      name: String(name),
+      input,
+    });
+}
+
+function createToolsNamespace(callTool, enabledTools) {
  const tools = Object.create(null);

  for (const { tool_name } of enabledTools) {
-    const callTool = async (args) =>
-      protocol.request('tool_call', {
-        name: String(tool_name),
-        input: args,
-      });
    Object.defineProperty(tools, tool_name, {
-      value: callTool,
+      value: async (args) => callTool(tool_name, args),
      configurable: false,
      enumerable: true,
      writable: false,
@ -120,8 +131,8 @@ function createToolsNamespace(protocol, enabledTools) {
  return Object.freeze(tools);
 }

-function createToolsModule(context, protocol, enabledTools) {
-  const tools = createToolsNamespace(protocol, enabledTools);
+function createToolsModule(context, callTool, enabledTools) {
+  const tools = createToolsNamespace(callTool, enabledTools);
  const exportNames = ['tools'];

  for (const { tool_name } of enabledTools) {
@ -153,7 +164,7 @@ function namespacesMatch(left, right) {
  return left.every((segment, index) => segment === right[index]);
 }

-function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
+function createNamespacedToolsNamespace(callTool, enabledTools, namespace) {
  const tools = Object.create(null);

  for (const tool of enabledTools) {
@ -162,13 +173,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
      continue;
    }

-    const callTool = async (args) =>
-      protocol.request('tool_call', {
-        name: String(tool.tool_name),
-        input: args,
-      });
    Object.defineProperty(tools, tool.name, {
-      value: callTool,
+      value: async (args) => callTool(tool.tool_name, args),
      configurable: false,
      enumerable: true,
      writable: false,
@ -178,8 +184,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
  return Object.freeze(tools);
 }

-function createNamespacedToolsModule(context, protocol, enabledTools, namespace) {
-  const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace);
+function createNamespacedToolsModule(context, callTool, enabledTools, namespace) {
+  const tools = createNamespacedToolsNamespace(callTool, enabledTools, namespace);
  const exportNames = ['tools'];

  for (const exportName of Object.keys(tools)) {
@ -204,14 +210,32 @@ function createNamespacedToolsModule(context, protocol, enabledTools, namespace)
  );
 }

-function createModuleResolver(context, protocol, enabledTools) {
-  const toolsModule = createToolsModule(context, protocol, enabledTools);
+function createCodeModeModule(context, state) {
+  return new SyntheticModule(
+    ['set_max_output_tokens_per_exec_call'],
+    function initCodeModeModule() {
+      this.setExport('set_max_output_tokens_per_exec_call', (value) => {
+        const normalized = normalizeMaxOutputTokensPerExecCall(value);
+        state.maxOutputTokensPerExecCall = normalized;
+        return normalized;
+      });
+    },
+    { context }
+  );
+}
+
+function createModuleResolver(context, callTool, enabledTools, state) {
+  const toolsModule = createToolsModule(context, callTool, enabledTools);
+  const codeModeModule = createCodeModeModule(context, state);
  const namespacedModules = new Map();

  return function resolveModule(specifier) {
    if (specifier === 'tools.js') {
      return toolsModule;
    }
+    if (specifier === '@openai/code_mode') {
+      return codeModeModule;
+    }

    const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
    if (!namespacedMatch) {
@ -229,45 +253,47 @@ function createModuleResolver(context, protocol, enabledTools) {
    if (!namespacedModules.has(cacheKey)) {
      namespacedModules.set(
        cacheKey,
-        createNamespacedToolsModule(context, protocol, enabledTools, namespace)
+        createNamespacedToolsModule(context, callTool, enabledTools, namespace)
      );
    }
    return namespacedModules.get(cacheKey);
  };
 }

-async function runModule(context, protocol, request) {
-  const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []);
+async function runModule(context, protocol, request, state, callTool) {
+  const resolveModule = createModuleResolver(
+    context,
+    callTool,
+    request.enabled_tools ?? [],
+    state
+  );
  const mainModule = new SourceTextModule(request.source, {
    context,
    identifier: 'code_mode_main.mjs',
-    importModuleDynamically(specifier) {
-      return resolveModule(specifier);
-    },
+    importModuleDynamically: async (specifier) => resolveModule(specifier),
  });

-  await mainModule.link(async (specifier) => {
-    return resolveModule(specifier);
-  });
+  await mainModule.link(resolveModule);
  await mainModule.evaluate();
 }

 async function main() {
  const protocol = createProtocol();
  const request = await protocol.init;
+  const state = {
+    maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
+  };
+  const callTool = createToolCaller(protocol);
  const context = vm.createContext({
-    __codex_tool_call: async (name, input) =>
-      protocol.request('tool_call', {
-        name: String(name),
-        input,
-      }),
+    __codex_tool_call: callTool,
  });

  try {
-    await runModule(context, protocol, request);
+    await runModule(context, protocol, request, state, callTool);
    await protocol.send({
      type: 'result',
      content_items: readContentItems(context),
+      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
    });
    process.exit(0);
  } catch (error) {
@ -275,6 +301,7 @@ async function main() {
    await protocol.send({
      type: 'result',
      content_items: readContentItems(context),
+      max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
    });
    process.exit(1);
  }
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@ -1621,7 +1621,7 @@ source: /[\s\S]+/
        enabled_tool_names.join(", ")
    };
    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
+        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
    );

    ToolSpec::Freeform(FreeformTool {
--- a/codex-rs/core/src/truncate.rs
+++ b/codex-rs/core/src/truncate.rs
@ -94,6 +94,51 @@ pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
        }
    }
 }
+
+pub(crate) fn formatted_truncate_text_content_items_with_policy(
+    items: &[FunctionCallOutputContentItem],
+    policy: TruncationPolicy,
+) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
+    let text_segments = items
+        .iter()
+        .filter_map(|item| match item {
+            FunctionCallOutputContentItem::InputText { text } => Some(text.as_str()),
+            FunctionCallOutputContentItem::InputImage { .. } => None,
+        })
+        .collect::<Vec<_>>();
+
+    if text_segments.is_empty() {
+        return (items.to_vec(), None);
+    }
+
+    let mut combined = String::new();
+    for text in &text_segments {
+        if !combined.is_empty() {
+            combined.push('\n');
+        }
+        combined.push_str(text);
+    }
+
+    if combined.len() <= policy.byte_budget() {
+        return (items.to_vec(), None);
+    }
+
+    let mut out = vec![FunctionCallOutputContentItem::InputText {
+        text: formatted_truncate_text(&combined, policy),
+    }];
+    out.extend(items.iter().filter_map(|item| match item {
+        FunctionCallOutputContentItem::InputImage { image_url, detail } => {
+            Some(FunctionCallOutputContentItem::InputImage {
+                image_url: image_url.clone(),
+                detail: *detail,
+            })
+        }
+        FunctionCallOutputContentItem::InputText { .. } => None,
+    }));
+
+    (out, Some(approx_token_count(&combined)))
+}
+
 /// Globally truncate function output items to fit within the given
 /// truncation policy's budget, preserving as many text/image items as
 /// possible and appending a summary for any omitted text items.
@ -319,6 +364,7 @@ mod tests {
    use super::TruncationPolicy;
    use super::approx_token_count;
    use super::formatted_truncate_text;
+    use super::formatted_truncate_text_content_items_with_policy;
    use super::split_string;
    use super::truncate_function_output_items_with_policy;
    use super::truncate_text;
@ -540,4 +586,92 @@ mod tests {
        };
        assert!(summary_text.contains("omitted 2 text items"));
    }
+
+    #[test]
+    fn formatted_truncate_text_content_items_with_policy_returns_original_under_limit() {
+        let items = vec![
+            FunctionCallOutputContentItem::InputText {
+                text: "alpha".to_string(),
+            },
+            FunctionCallOutputContentItem::InputText {
+                text: String::new(),
+            },
+            FunctionCallOutputContentItem::InputText {
+                text: "beta".to_string(),
+            },
+        ];
+
+        let (output, original_token_count) =
+            formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(32));
+
+        assert_eq!(output, items);
+        assert_eq!(original_token_count, None);
+    }
+
+    #[test]
+    fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_images() {
+        let items = vec![
+            FunctionCallOutputContentItem::InputText {
+                text: "abcd".to_string(),
+            },
+            FunctionCallOutputContentItem::InputImage {
+                image_url: "img:one".to_string(),
+                detail: None,
+            },
+            FunctionCallOutputContentItem::InputText {
+                text: "efgh".to_string(),
+            },
+            FunctionCallOutputContentItem::InputText {
+                text: "ijkl".to_string(),
+            },
+            FunctionCallOutputContentItem::InputImage {
+                image_url: "img:two".to_string(),
+                detail: None,
+            },
+        ];
+
+        let (output, original_token_count) =
+            formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(8));
+
+        assert_eq!(
+            output,
+            vec![
+                FunctionCallOutputContentItem::InputText {
+                    text: "Total output lines: 3\n\nabcd…6 chars truncated…ijkl".to_string(),
+                },
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: "img:one".to_string(),
+                    detail: None,
+                },
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: "img:two".to_string(),
+                    detail: None,
+                },
+            ]
+        );
+        assert_eq!(original_token_count, Some(4));
+    }
+
+    #[test]
+    fn formatted_truncate_text_content_items_with_policy_merges_all_text_for_token_budget() {
+        let items = vec![
+            FunctionCallOutputContentItem::InputText {
+                text: "abcdefgh".to_string(),
+            },
+            FunctionCallOutputContentItem::InputText {
+                text: "ijklmnop".to_string(),
+            },
+        ];
+
+        let (output, original_token_count) =
+            formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Tokens(2));
+
+        assert_eq!(
+            output,
+            vec![FunctionCallOutputContentItem::InputText {
+                text: "Total output lines: 2\n\nabcd…3 tokens truncated…mnop".to_string(),
+            }]
+        );
+        assert_eq!(original_token_count, Some(5));
+    }
 }
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@ -4,6 +4,7 @@ use anyhow::Result;
 use codex_core::config::types::McpServerConfig;
 use codex_core::config::types::McpServerTransportConfig;
 use codex_core::features::Feature;
+use core_test_support::assert_regex_match;
 use core_test_support::responses;
 use core_test_support::responses::ResponseMock;
 use core_test_support::responses::ResponsesRequest;
@ -175,6 +176,51 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
    Ok(())
 }

+#[cfg_attr(windows, ignore = "no exec_command on Windows")]
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
+    skip_if_no_network!(Ok(()));
+
+    let server = responses::start_mock_server().await;
+    let (_test, second_mock) = run_code_mode_turn(
+        &server,
+        "use code_mode to truncate the final result",
+        r#"
+import { exec_command } from "tools.js";
+import { set_max_output_tokens_per_exec_call } from "@openai/code_mode";
+
+set_max_output_tokens_per_exec_call(6);
+
+add_content(JSON.stringify(await exec_command({
+  cmd: "printf 'token one token two token three token four token five token six token seven'",
+  max_output_tokens: 100
+})));
+"#,
+        false,
+    )
+    .await?;
+
+    let req = second_mock.single_request();
+    let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
+    assert_ne!(
+        success,
+        Some(false),
+        "code_mode call failed unexpectedly: {output}"
+    );
+    let expected_pattern = r#"(?sx)
+\A
+Original\ token\ count:\ \d+\n
+Output:\n
+Total\ output\ lines:\ 1\n
+\n
+\{"chunk_id".*…\d+\ tokens\ truncated….*
+\z
+"#;
+    assert_regex_match(expected_pattern, &output);
+
+    Ok(())
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
    skip_if_no_network!(Ok(()));