Add model-controlled truncation for code mode results (#14258)

Summary
- document that `@openai/code_mode` exposes
`set_max_output_tokens_per_exec_call` and that `code_mode` truncates the
final Rust-side output when the budget is exceeded
- enforce the configured budget in the Rust tool runner, reusing
truncation helpers so text-only outputs follow the unified-exec wrapper
and mixed outputs still fit within the limit
- ensure the new behavior is covered by a code-mode integration test and
string spec update

Testing
- Not run (not requested)
This commit is contained in:
pakrym-oai 2026-03-10 15:57:14 -07:00 committed by Michael Bolin
parent ee8f84153e
commit 3d41ff0b77
5 changed files with 284 additions and 38 deletions

View file

@ -14,6 +14,10 @@ use crate::tools::context::ToolPayload;
use crate::tools::js_repl::resolve_compatible_node;
use crate::tools::router::ToolCall;
use crate::tools::router::ToolCallSource;
use crate::truncate::TruncationPolicy;
use crate::truncate::formatted_truncate_text_content_items_with_policy;
use crate::truncate::truncate_function_output_items_with_policy;
use crate::unified_exec::resolve_max_tokens;
use codex_protocol::models::FunctionCallOutputContentItem;
use serde::Deserialize;
use serde::Serialize;
@ -72,6 +76,8 @@ enum NodeToHostMessage {
},
Result {
content_items: Vec<JsonValue>,
#[serde(default)]
max_output_tokens_per_exec_call: Option<usize>,
},
}
@ -88,6 +94,7 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
section.push_str("- Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
@ -187,8 +194,14 @@ async fn execute_node(
};
write_message(&mut stdin, &response).await?;
}
NodeToHostMessage::Result { content_items } => {
final_content_items = Some(output_content_items_from_json_values(content_items)?);
NodeToHostMessage::Result {
content_items,
max_output_tokens_per_exec_call,
} => {
final_content_items = Some(truncate_code_mode_result(
output_content_items_from_json_values(content_items)?,
max_output_tokens_per_exec_call,
));
break;
}
}
@ -261,6 +274,32 @@ fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result<String
.replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code))
}
fn truncate_code_mode_result(
items: Vec<FunctionCallOutputContentItem>,
max_output_tokens_per_exec_call: Option<usize>,
) -> Vec<FunctionCallOutputContentItem> {
let max_output_tokens = resolve_max_tokens(max_output_tokens_per_exec_call);
if items
.iter()
.all(|item| matches!(item, FunctionCallOutputContentItem::InputText { .. }))
{
let (mut truncated_items, original_token_count) =
formatted_truncate_text_content_items_with_policy(
&items,
TruncationPolicy::Tokens(max_output_tokens),
);
if let Some(original_token_count) = original_token_count
&& let Some(FunctionCallOutputContentItem::InputText { text }) =
truncated_items.first_mut()
{
*text = format!("Original token count: {original_token_count}\nOutput:\n{text}");
}
return truncated_items;
}
truncate_function_output_items_with_policy(&items, TruncationPolicy::Tokens(max_output_tokens))
}
async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
let router = build_nested_router(exec).await;
let mcp_tool_names = exec

View file

@ -4,6 +4,14 @@ const readline = require('node:readline');
const vm = require('node:vm');
const { SourceTextModule, SyntheticModule } = vm;
const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000;
function normalizeMaxOutputTokensPerExecCall(value) {
if (!Number.isSafeInteger(value) || value < 0) {
throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
}
return value;
}
function createProtocol() {
const rl = readline.createInterface({
@ -100,17 +108,20 @@ function isValidIdentifier(name) {
return /^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name);
}
function createToolsNamespace(protocol, enabledTools) {
function createToolCaller(protocol) {
return (name, input) =>
protocol.request('tool_call', {
name: String(name),
input,
});
}
function createToolsNamespace(callTool, enabledTools) {
const tools = Object.create(null);
for (const { tool_name } of enabledTools) {
const callTool = async (args) =>
protocol.request('tool_call', {
name: String(tool_name),
input: args,
});
Object.defineProperty(tools, tool_name, {
value: callTool,
value: async (args) => callTool(tool_name, args),
configurable: false,
enumerable: true,
writable: false,
@ -120,8 +131,8 @@ function createToolsNamespace(protocol, enabledTools) {
return Object.freeze(tools);
}
function createToolsModule(context, protocol, enabledTools) {
const tools = createToolsNamespace(protocol, enabledTools);
function createToolsModule(context, callTool, enabledTools) {
const tools = createToolsNamespace(callTool, enabledTools);
const exportNames = ['tools'];
for (const { tool_name } of enabledTools) {
@ -153,7 +164,7 @@ function namespacesMatch(left, right) {
return left.every((segment, index) => segment === right[index]);
}
function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
function createNamespacedToolsNamespace(callTool, enabledTools, namespace) {
const tools = Object.create(null);
for (const tool of enabledTools) {
@ -162,13 +173,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
continue;
}
const callTool = async (args) =>
protocol.request('tool_call', {
name: String(tool.tool_name),
input: args,
});
Object.defineProperty(tools, tool.name, {
value: callTool,
value: async (args) => callTool(tool.tool_name, args),
configurable: false,
enumerable: true,
writable: false,
@ -178,8 +184,8 @@ function createNamespacedToolsNamespace(protocol, enabledTools, namespace) {
return Object.freeze(tools);
}
function createNamespacedToolsModule(context, protocol, enabledTools, namespace) {
const tools = createNamespacedToolsNamespace(protocol, enabledTools, namespace);
function createNamespacedToolsModule(context, callTool, enabledTools, namespace) {
const tools = createNamespacedToolsNamespace(callTool, enabledTools, namespace);
const exportNames = ['tools'];
for (const exportName of Object.keys(tools)) {
@ -204,14 +210,32 @@ function createNamespacedToolsModule(context, protocol, enabledTools, namespace)
);
}
function createModuleResolver(context, protocol, enabledTools) {
const toolsModule = createToolsModule(context, protocol, enabledTools);
function createCodeModeModule(context, state) {
return new SyntheticModule(
['set_max_output_tokens_per_exec_call'],
function initCodeModeModule() {
this.setExport('set_max_output_tokens_per_exec_call', (value) => {
const normalized = normalizeMaxOutputTokensPerExecCall(value);
state.maxOutputTokensPerExecCall = normalized;
return normalized;
});
},
{ context }
);
}
function createModuleResolver(context, callTool, enabledTools, state) {
const toolsModule = createToolsModule(context, callTool, enabledTools);
const codeModeModule = createCodeModeModule(context, state);
const namespacedModules = new Map();
return function resolveModule(specifier) {
if (specifier === 'tools.js') {
return toolsModule;
}
if (specifier === '@openai/code_mode') {
return codeModeModule;
}
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
if (!namespacedMatch) {
@ -229,45 +253,47 @@ function createModuleResolver(context, protocol, enabledTools) {
if (!namespacedModules.has(cacheKey)) {
namespacedModules.set(
cacheKey,
createNamespacedToolsModule(context, protocol, enabledTools, namespace)
createNamespacedToolsModule(context, callTool, enabledTools, namespace)
);
}
return namespacedModules.get(cacheKey);
};
}
async function runModule(context, protocol, request) {
const resolveModule = createModuleResolver(context, protocol, request.enabled_tools ?? []);
async function runModule(context, protocol, request, state, callTool) {
const resolveModule = createModuleResolver(
context,
callTool,
request.enabled_tools ?? [],
state
);
const mainModule = new SourceTextModule(request.source, {
context,
identifier: 'code_mode_main.mjs',
importModuleDynamically(specifier) {
return resolveModule(specifier);
},
importModuleDynamically: async (specifier) => resolveModule(specifier),
});
await mainModule.link(async (specifier) => {
return resolveModule(specifier);
});
await mainModule.link(resolveModule);
await mainModule.evaluate();
}
async function main() {
const protocol = createProtocol();
const request = await protocol.init;
const state = {
maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
};
const callTool = createToolCaller(protocol);
const context = vm.createContext({
__codex_tool_call: async (name, input) =>
protocol.request('tool_call', {
name: String(name),
input,
}),
__codex_tool_call: callTool,
});
try {
await runModule(context, protocol, request);
await runModule(context, protocol, request, state, callTool);
await protocol.send({
type: 'result',
content_items: readContentItems(context),
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(0);
} catch (error) {
@ -275,6 +301,7 @@ async function main() {
await protocol.send({
type: 'result',
content_items: readContentItems(context),
max_output_tokens_per_exec_call: state.maxOutputTokensPerExecCall,
});
process.exit(1);
}

View file

@ -1621,7 +1621,7 @@ source: /[\s\S]+/
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `set_max_output_tokens_per_exec_call` from `@openai/code_mode` to set the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);
ToolSpec::Freeform(FreeformTool {

View file

@ -94,6 +94,51 @@ pub(crate) fn truncate_text(content: &str, policy: TruncationPolicy) -> String {
}
}
}
pub(crate) fn formatted_truncate_text_content_items_with_policy(
items: &[FunctionCallOutputContentItem],
policy: TruncationPolicy,
) -> (Vec<FunctionCallOutputContentItem>, Option<usize>) {
let text_segments = items
.iter()
.filter_map(|item| match item {
FunctionCallOutputContentItem::InputText { text } => Some(text.as_str()),
FunctionCallOutputContentItem::InputImage { .. } => None,
})
.collect::<Vec<_>>();
if text_segments.is_empty() {
return (items.to_vec(), None);
}
let mut combined = String::new();
for text in &text_segments {
if !combined.is_empty() {
combined.push('\n');
}
combined.push_str(text);
}
if combined.len() <= policy.byte_budget() {
return (items.to_vec(), None);
}
let mut out = vec![FunctionCallOutputContentItem::InputText {
text: formatted_truncate_text(&combined, policy),
}];
out.extend(items.iter().filter_map(|item| match item {
FunctionCallOutputContentItem::InputImage { image_url, detail } => {
Some(FunctionCallOutputContentItem::InputImage {
image_url: image_url.clone(),
detail: *detail,
})
}
FunctionCallOutputContentItem::InputText { .. } => None,
}));
(out, Some(approx_token_count(&combined)))
}
/// Globally truncate function output items to fit within the given
/// truncation policy's budget, preserving as many text/image items as
/// possible and appending a summary for any omitted text items.
@ -319,6 +364,7 @@ mod tests {
use super::TruncationPolicy;
use super::approx_token_count;
use super::formatted_truncate_text;
use super::formatted_truncate_text_content_items_with_policy;
use super::split_string;
use super::truncate_function_output_items_with_policy;
use super::truncate_text;
@ -540,4 +586,92 @@ mod tests {
};
assert!(summary_text.contains("omitted 2 text items"));
}
#[test]
fn formatted_truncate_text_content_items_with_policy_returns_original_under_limit() {
let items = vec![
FunctionCallOutputContentItem::InputText {
text: "alpha".to_string(),
},
FunctionCallOutputContentItem::InputText {
text: String::new(),
},
FunctionCallOutputContentItem::InputText {
text: "beta".to_string(),
},
];
let (output, original_token_count) =
formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(32));
assert_eq!(output, items);
assert_eq!(original_token_count, None);
}
#[test]
fn formatted_truncate_text_content_items_with_policy_merges_text_and_appends_images() {
let items = vec![
FunctionCallOutputContentItem::InputText {
text: "abcd".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:one".to_string(),
detail: None,
},
FunctionCallOutputContentItem::InputText {
text: "efgh".to_string(),
},
FunctionCallOutputContentItem::InputText {
text: "ijkl".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:two".to_string(),
detail: None,
},
];
let (output, original_token_count) =
formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Bytes(8));
assert_eq!(
output,
vec![
FunctionCallOutputContentItem::InputText {
text: "Total output lines: 3\n\nabcd…6 chars truncated…ijkl".to_string(),
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:one".to_string(),
detail: None,
},
FunctionCallOutputContentItem::InputImage {
image_url: "img:two".to_string(),
detail: None,
},
]
);
assert_eq!(original_token_count, Some(4));
}
#[test]
fn formatted_truncate_text_content_items_with_policy_merges_all_text_for_token_budget() {
let items = vec![
FunctionCallOutputContentItem::InputText {
text: "abcdefgh".to_string(),
},
FunctionCallOutputContentItem::InputText {
text: "ijklmnop".to_string(),
},
];
let (output, original_token_count) =
formatted_truncate_text_content_items_with_policy(&items, TruncationPolicy::Tokens(2));
assert_eq!(
output,
vec![FunctionCallOutputContentItem::InputText {
text: "Total output lines: 2\n\nabcd…3 tokens truncated…mnop".to_string(),
}]
);
assert_eq!(original_token_count, Some(5));
}
}

View file

@ -4,6 +4,7 @@ use anyhow::Result;
use codex_core::config::types::McpServerConfig;
use codex_core::config::types::McpServerTransportConfig;
use codex_core::features::Feature;
use core_test_support::assert_regex_match;
use core_test_support::responses;
use core_test_support::responses::ResponseMock;
use core_test_support::responses::ResponsesRequest;
@ -175,6 +176,51 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let (_test, second_mock) = run_code_mode_turn(
&server,
"use code_mode to truncate the final result",
r#"
import { exec_command } from "tools.js";
import { set_max_output_tokens_per_exec_call } from "@openai/code_mode";
set_max_output_tokens_per_exec_call(6);
add_content(JSON.stringify(await exec_command({
cmd: "printf 'token one token two token three token four token five token six token seven'",
max_output_tokens: 100
})));
"#,
false,
)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"code_mode call failed unexpectedly: {output}"
);
let expected_pattern = r#"(?sx)
\A
Original\ token\ count:\ \d+\n
Output:\n
Total\ output\ lines:\ 1\n
\n
\{"chunk_id".*\d+\ tokens\ truncated.*
\z
"#;
assert_regex_match(expected_pattern, &output);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
skip_if_no_network!(Ok(()));