From a2546d5dff12e7f629ff540bb2603e7ae635748d Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Thu, 12 Mar 2026 15:43:59 -0700 Subject: [PATCH] Expose code-mode tools through globals (#14517) Summary - make all code-mode tools accessible as globals so callers only need `tools.` - rename text/image helpers and key globals (store, load, ALL_TOOLS, etc.) to reflect the new shared namespace - update the JS bridge, runners, descriptions, router, and tests to follow the new API Testing - Not run (not requested) --- codex-rs/core/src/tools/code_mode/bridge.js | 77 +++-- .../core/src/tools/code_mode/description.md | 12 +- codex-rs/core/src/tools/code_mode/mod.rs | 14 +- codex-rs/core/src/tools/code_mode/runner.cjs | 145 +++++++--- .../core/src/tools/code_mode_description.rs | 34 ++- .../src/tools/code_mode_description_tests.rs | 8 +- codex-rs/core/src/tools/parallel.rs | 5 + codex-rs/core/src/tools/router.rs | 7 + codex-rs/core/src/tools/spec_tests.rs | 4 +- codex-rs/core/tests/suite/code_mode.rs | 265 +++++++++--------- 10 files changed, 325 insertions(+), 246 deletions(-) diff --git a/codex-rs/core/src/tools/code_mode/bridge.js b/codex-rs/core/src/tools/code_mode/bridge.js index d7967faab..5989985f1 100644 --- a/codex-rs/core/src/tools/code_mode/bridge.js +++ b/codex-rs/core/src/tools/code_mode/bridge.js @@ -1,7 +1,9 @@ -const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__; const __codexContentItems = Array.isArray(globalThis.__codexContentItems) ? globalThis.__codexContentItems : []; +const __codexRuntime = globalThis.__codexRuntime; + +delete globalThis.__codexRuntime; Object.defineProperty(globalThis, '__codexContentItems', { value: __codexContentItems, @@ -11,53 +13,42 @@ Object.defineProperty(globalThis, '__codexContentItems', { }); (() => { - function cloneContentItem(item) { - if (!item || typeof item !== 'object') { - throw new TypeError('content item must be an object'); - } - switch (item.type) { - case 'input_text': - if (typeof item.text !== 'string') { - throw new TypeError('content item "input_text" requires a string text field'); - } - return { type: 'input_text', text: item.text }; - case 'input_image': - if (typeof item.image_url !== 'string') { - throw new TypeError('content item "input_image" requires a string image_url field'); - } - return { type: 'input_image', image_url: item.image_url }; - default: - throw new TypeError(`unsupported content item type "${item.type}"`); - } + if (!__codexRuntime || typeof __codexRuntime !== 'object') { + throw new Error('code mode runtime is unavailable'); } - function normalizeRawContentItems(value) { - if (Array.isArray(value)) { - return value.flatMap((entry) => normalizeRawContentItems(entry)); - } - return [cloneContentItem(value)]; + function defineGlobal(name, value) { + Object.defineProperty(globalThis, name, { + value, + configurable: true, + enumerable: true, + writable: false, + }); } - function normalizeContentItems(value) { - if (typeof value === 'string') { - return [{ type: 'input_text', text: value }]; - } - return normalizeRawContentItems(value); - } + defineGlobal('ALL_TOOLS', __codexRuntime.ALL_TOOLS); + defineGlobal('image', __codexRuntime.image); + defineGlobal('load', __codexRuntime.load); + defineGlobal( + 'set_max_output_tokens_per_exec_call', + __codexRuntime.set_max_output_tokens_per_exec_call + ); + defineGlobal('set_yield_time', __codexRuntime.set_yield_time); + defineGlobal('store', __codexRuntime.store); + defineGlobal('text', __codexRuntime.text); + defineGlobal('tools', __codexRuntime.tools); + defineGlobal('yield_control', __codexRuntime.yield_control); - globalThis.add_content = (value) => { - const contentItems = normalizeContentItems(value); - __codexContentItems.push(...contentItems); - return contentItems; - }; - - globalThis.console = Object.freeze({ - log() {}, - info() {}, - warn() {}, - error() {}, - debug() {}, - }); + defineGlobal( + 'console', + Object.freeze({ + log() {}, + info() {}, + warn() {}, + error() {}, + debug() {}, + }) + ); })(); __CODE_MODE_USER_CODE_PLACEHOLDER__ diff --git a/codex-rs/core/src/tools/code_mode/description.md b/codex-rs/core/src/tools/code_mode/description.md index 482e07afe..d5e564545 100644 --- a/codex-rs/core/src/tools/code_mode/description.md +++ b/codex-rs/core/src/tools/code_mode/description.md @@ -1,18 +1,16 @@ ## exec - Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console). - Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences. -- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js` +- All nested tools are available on the global `tools` object, for example `await tools.exec_command(...)`. Tool names are exposed as normalized JavaScript identifiers, for example `await tools.mcp__ologs__get_profile(...)`. - Tool methods take either string or object as parameter. - They return either a structured value or a string based on the description above. -- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values. - -- Methods available in `@openai/code_mode` module: -- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values. -- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. +- Global helpers: +- `text(value: string | number | boolean | undefined | null)`: Appends a text item and returns it. Non-string values are stringified with `JSON.stringify(...)` when possible. +- `image(imageUrl: string)`: Appends an image item and returns it. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. - `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session. - `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing. - +- `ALL_TOOLS`: metadata for the enabled nested tools as `{ name, description }` entries. - `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens. - `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running. - `yield_control()`: yields the accumulated output to the model immediately while the script keeps running. diff --git a/codex-rs/core/src/tools/code_mode/mod.rs b/codex-rs/core/src/tools/code_mode/mod.rs index 9a92a2486..ab25fd1ed 100644 --- a/codex-rs/core/src/tools/code_mode/mod.rs +++ b/codex-rs/core/src/tools/code_mode/mod.rs @@ -280,8 +280,6 @@ async fn call_nested_tool( return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself")); } - let router = build_nested_router(&exec).await; - let specs = router.specs(); let payload = if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name, &None).await { match serialize_function_tool_arguments(&tool_name, input) { @@ -293,7 +291,7 @@ async fn call_nested_tool( Err(error) => return JsonValue::String(error), } } else { - match build_nested_tool_payload(&specs, &tool_name, input) { + match build_nested_tool_payload(tool_runtime.find_spec(&tool_name), &tool_name, input) { Ok(payload) => payload, Err(error) => return JsonValue::String(error), } @@ -324,22 +322,20 @@ fn tool_kind_for_spec(spec: &ToolSpec) -> protocol::CodeModeToolKind { } fn tool_kind_for_name( - specs: &[ToolSpec], + spec: Option, tool_name: &str, ) -> Result { - specs - .iter() - .find(|spec| spec.name() == tool_name) + spec.as_ref() .map(tool_kind_for_spec) .ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}")) } fn build_nested_tool_payload( - specs: &[ToolSpec], + spec: Option, tool_name: &str, input: Option, ) -> Result { - let actual_kind = tool_kind_for_name(specs, tool_name)?; + let actual_kind = tool_kind_for_name(spec, tool_name)?; match actual_kind { protocol::CodeModeToolKind::Function => build_function_tool_payload(tool_name, input), protocol::CodeModeToolKind::Freeform => build_freeform_tool_payload(tool_name, input), diff --git a/codex-rs/core/src/tools/code_mode/runner.cjs b/codex-rs/core/src/tools/code_mode/runner.cjs index b2002a2b7..b498650a8 100644 --- a/codex-rs/core/src/tools/code_mode/runner.cjs +++ b/codex-rs/core/src/tools/code_mode/runner.cjs @@ -131,7 +131,22 @@ function codeModeWorkerMain() { return contentItems; } - function createToolsNamespace(callTool, enabledTools) { + function createGlobalToolsNamespace(callTool, enabledTools) { + const tools = Object.create(null); + + for (const { tool_name, global_name } of enabledTools) { + Object.defineProperty(tools, global_name, { + value: async (args) => callTool(tool_name, args), + configurable: false, + enumerable: true, + writable: false, + }); + } + + return Object.freeze(tools); + } + + function createModuleToolsNamespace(callTool, enabledTools) { const tools = Object.create(null); for (const { tool_name, global_name } of enabledTools) { @@ -148,10 +163,9 @@ function codeModeWorkerMain() { function createAllToolsMetadata(enabledTools) { return Object.freeze( - enabledTools.map(({ module: modulePath, name, description }) => + enabledTools.map(({ global_name, description }) => Object.freeze({ - module: modulePath, - name, + name: global_name, description, }) ) @@ -159,7 +173,7 @@ function codeModeWorkerMain() { } function createToolsModule(context, callTool, enabledTools) { - const tools = createToolsNamespace(callTool, enabledTools); + const tools = createModuleToolsNamespace(callTool, enabledTools); const allTools = createAllToolsMetadata(enabledTools); const exportNames = ['ALL_TOOLS']; @@ -216,15 +230,15 @@ function codeModeWorkerMain() { function normalizeOutputImageUrl(value) { if (typeof value !== 'string' || !value) { - throw new TypeError('output_image expects a non-empty image URL string'); + throw new TypeError('image expects a non-empty image URL string'); } if (/^(?:https?:\/\/|data:)/i.test(value)) { return value; } - throw new TypeError('output_image expects an http(s) or data URL'); + throw new TypeError('image expects an http(s) or data URL'); } - function createCodeModeModule(context, state) { + function createCodeModeHelpers(context, state) { const load = (key) => { if (typeof key !== 'string') { throw new TypeError('load key must be a string'); @@ -240,7 +254,7 @@ function codeModeWorkerMain() { } state.storedValues[key] = cloneJsonValue(value); }; - const outputText = (value) => { + const text = (value) => { const item = { type: 'input_text', text: serializeOutputText(value), @@ -248,7 +262,7 @@ function codeModeWorkerMain() { ensureContentItems(context).push(item); return item; }; - const outputImage = (value) => { + const image = (value) => { const item = { type: 'input_image', image_url: normalizeOutputImageUrl(value), @@ -256,47 +270,85 @@ function codeModeWorkerMain() { ensureContentItems(context).push(item); return item; }; + const setMaxOutputTokensPerExecCall = (value) => { + const normalized = normalizeMaxOutputTokensPerExecCall(value); + state.maxOutputTokensPerExecCall = normalized; + parentPort.postMessage({ + type: 'set_max_output_tokens_per_exec_call', + value: normalized, + }); + return normalized; + }; + const setYieldTime = (value) => { + const normalized = normalizeYieldTime(value); + parentPort.postMessage({ + type: 'set_yield_time', + value: normalized, + }); + return normalized; + }; + const yieldControl = () => { + parentPort.postMessage({ type: 'yield' }); + }; + return Object.freeze({ + image, + load, + output_image: image, + output_text: text, + set_max_output_tokens_per_exec_call: setMaxOutputTokensPerExecCall, + set_yield_time: setYieldTime, + store, + text, + yield_control: yieldControl, + }); + } + + function createCodeModeModule(context, helpers) { return new SyntheticModule( [ + 'image', 'load', 'output_text', 'output_image', 'set_max_output_tokens_per_exec_call', 'set_yield_time', 'store', + 'text', 'yield_control', ], function initCodeModeModule() { - this.setExport('load', load); - this.setExport('output_text', outputText); - this.setExport('output_image', outputImage); - this.setExport('set_max_output_tokens_per_exec_call', (value) => { - const normalized = normalizeMaxOutputTokensPerExecCall(value); - state.maxOutputTokensPerExecCall = normalized; - parentPort.postMessage({ - type: 'set_max_output_tokens_per_exec_call', - value: normalized, - }); - return normalized; - }); - this.setExport('set_yield_time', (value) => { - const normalized = normalizeYieldTime(value); - parentPort.postMessage({ - type: 'set_yield_time', - value: normalized, - }); - return normalized; - }); - this.setExport('store', store); - this.setExport('yield_control', () => { - parentPort.postMessage({ type: 'yield' }); - }); + this.setExport('image', helpers.image); + this.setExport('load', helpers.load); + this.setExport('output_text', helpers.output_text); + this.setExport('output_image', helpers.output_image); + this.setExport( + 'set_max_output_tokens_per_exec_call', + helpers.set_max_output_tokens_per_exec_call + ); + this.setExport('set_yield_time', helpers.set_yield_time); + this.setExport('store', helpers.store); + this.setExport('text', helpers.text); + this.setExport('yield_control', helpers.yield_control); }, { context } ); } + function createBridgeRuntime(callTool, enabledTools, helpers) { + return Object.freeze({ + ALL_TOOLS: createAllToolsMetadata(enabledTools), + image: helpers.image, + load: helpers.load, + set_max_output_tokens_per_exec_call: helpers.set_max_output_tokens_per_exec_call, + set_yield_time: helpers.set_yield_time, + store: helpers.store, + text: helpers.text, + tools: createGlobalToolsNamespace(callTool, enabledTools), + yield_control: helpers.yield_control, + }); + } + function namespacesMatch(left, right) { if (left.length !== right.length) { return false; @@ -347,16 +399,18 @@ function codeModeWorkerMain() { ); } - function createModuleResolver(context, callTool, enabledTools, state) { - const toolsModule = createToolsModule(context, callTool, enabledTools); - const codeModeModule = createCodeModeModule(context, state); + function createModuleResolver(context, callTool, enabledTools, helpers) { + let toolsModule; + let codeModeModule; const namespacedModules = new Map(); return function resolveModule(specifier) { if (specifier === 'tools.js') { + toolsModule ??= createToolsModule(context, callTool, enabledTools); return toolsModule; } if (specifier === '@openai/code_mode' || specifier === 'openai/code_mode') { + codeModeModule ??= createCodeModeModule(context, helpers); return codeModeModule; } const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier); @@ -400,12 +454,12 @@ function codeModeWorkerMain() { return module; } - async function runModule(context, start, state, callTool) { + async function runModule(context, start, callTool, helpers) { const resolveModule = createModuleResolver( context, callTool, start.enabled_tools ?? [], - state + helpers ); const mainModule = new SourceTextModule(start.source, { context, @@ -425,12 +479,21 @@ function codeModeWorkerMain() { storedValues: cloneJsonValue(start.stored_values ?? {}), }; const callTool = createToolCaller(); + const enabledTools = start.enabled_tools ?? []; + const contentItems = createContentItems(); const context = vm.createContext({ - __codexContentItems: createContentItems(), + __codexContentItems: contentItems, + }); + const helpers = createCodeModeHelpers(context, state); + Object.defineProperty(context, '__codexRuntime', { + value: createBridgeRuntime(callTool, enabledTools, helpers), + configurable: true, + enumerable: false, + writable: false, }); try { - await runModule(context, start, state, callTool); + await runModule(context, start, callTool, helpers); parentPort.postMessage({ type: 'result', stored_values: state.storedValues, diff --git a/codex-rs/core/src/tools/code_mode_description.rs b/codex-rs/core/src/tools/code_mode_description.rs index 318e6f495..c5657fcac 100644 --- a/codex-rs/core/src/tools/code_mode_description.rs +++ b/codex-rs/core/src/tools/code_mode_description.rs @@ -74,15 +74,41 @@ fn append_code_mode_sample( input_type: String, output_type: String, ) -> String { - let reference = code_mode_tool_reference(tool_name); - let local_name = normalize_code_mode_identifier(&reference.tool_key); let declaration = format!( - "import {{ {local_name} }} from \"{}\";\ndeclare function {local_name}({input_name}: {input_type}): Promise<{output_type}>;", - reference.module_path + "declare const tools: {{\n {}\n}};", + render_code_mode_tool_declaration(tool_name, input_name, input_type, output_type) ); format!("{description}\n\nCode mode declaration:\n```ts\n{declaration}\n```") } +fn render_code_mode_tool_declaration( + tool_name: &str, + input_name: &str, + input_type: String, + output_type: String, +) -> String { + let input_type = indent_multiline_type(&input_type, 2); + let output_type = indent_multiline_type(&output_type, 2); + let tool_name = normalize_code_mode_identifier(tool_name); + format!("{tool_name}({input_name}: {input_type}): Promise<{output_type}>;") +} + +fn indent_multiline_type(type_name: &str, spaces: usize) -> String { + let indent = " ".repeat(spaces); + type_name + .lines() + .enumerate() + .map(|(index, line)| { + if index == 0 { + line.to_string() + } else { + format!("{indent}{line}") + } + }) + .collect::>() + .join("\n") +} + pub(crate) fn normalize_code_mode_identifier(tool_key: &str) -> String { let mut identifier = String::new(); diff --git a/codex-rs/core/src/tools/code_mode_description_tests.rs b/codex-rs/core/src/tools/code_mode_description_tests.rs index f5b4f8820..d014fc409 100644 --- a/codex-rs/core/src/tools/code_mode_description_tests.rs +++ b/codex-rs/core/src/tools/code_mode_description_tests.rs @@ -76,7 +76,7 @@ fn render_json_schema_to_typescript_sorts_object_properties() { } #[test] -fn append_code_mode_sample_uses_static_import_for_valid_identifiers() { +fn append_code_mode_sample_uses_global_tools_for_valid_identifiers() { assert_eq!( append_code_mode_sample( "desc", @@ -85,12 +85,12 @@ fn append_code_mode_sample_uses_static_import_for_valid_identifiers() { "{ foo: string }".to_string(), "unknown".to_string(), ), - "desc\n\nCode mode declaration:\n```ts\nimport { get_profile } from \"tools/mcp/ologs.js\";\ndeclare function get_profile(args: { foo: string }): Promise;\n```" + "desc\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__ologs__get_profile(args: { foo: string }): Promise;\n};\n```" ); } #[test] -fn append_code_mode_sample_normalizes_non_identifier_tool_names() { +fn append_code_mode_sample_normalizes_invalid_identifiers() { assert_eq!( append_code_mode_sample( "desc", @@ -99,6 +99,6 @@ fn append_code_mode_sample_normalizes_non_identifier_tool_names() { "{ foo: string }".to_string(), "unknown".to_string(), ), - "desc\n\nCode mode declaration:\n```ts\nimport { echo_tool } from \"tools/mcp/rmcp.js\";\ndeclare function echo_tool(args: { foo: string }): Promise;\n```" + "desc\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__rmcp__echo_tool(args: { foo: string }): Promise;\n};\n```" ); } diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index 5f49ccffe..be7a28ed7 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -9,6 +9,7 @@ use tracing::Instrument; use tracing::instrument; use tracing::trace_span; +use crate::client_common::tools::ToolSpec; use crate::codex::Session; use crate::codex::TurnContext; use crate::error::CodexErr; @@ -46,6 +47,10 @@ impl ToolCallRuntime { } } + pub(crate) fn find_spec(&self, tool_name: &str) -> Option { + self.router.find_spec(tool_name) + } + #[instrument(level = "trace", skip_all)] pub(crate) fn handle_tool_call( self, diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index 9d8381c62..e211d83ce 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -75,6 +75,13 @@ impl ToolRouter { .collect() } + pub fn find_spec(&self, tool_name: &str) -> Option { + self.specs + .iter() + .find(|config| config.spec.name() == tool_name) + .map(|config| config.spec.clone()) + } + pub fn tool_supports_parallel(&self, tool_name: &str) -> bool { self.specs .iter() diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index 6b90f1f1c..4d4bcaad7 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -2443,7 +2443,7 @@ fn code_mode_augments_builtin_tool_descriptions_with_typed_sample() { assert_eq!( description, - "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nCode mode declaration:\n```ts\nimport { view_image } from \"tools.js\";\ndeclare function view_image(args: {\n path: string;\n}): Promise;\n```" + "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n view_image(args: {\n path: string;\n }): Promise;\n};\n```" ); } @@ -2495,7 +2495,7 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() { assert_eq!( description, - "Echo text\n\nCode mode declaration:\n```ts\nimport { echo } from \"tools/mcp/sample.js\";\ndeclare function echo(args: {\n message: string;\n}): Promise<{\n _meta?: unknown;\n content: Array;\n isError?: boolean;\n structuredContent?: unknown;\n}>;\n```" + "Echo text\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__sample__echo(args: {\n message: string;\n }): Promise<{\n _meta?: unknown;\n content: Array;\n isError?: boolean;\n structuredContent?: unknown;\n }>;\n};\n```" ); } diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index a427a5bc9..c6fc3dea9 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -63,7 +63,7 @@ fn wait_for_file_source(path: &Path) -> Result { let quoted_path = shlex::try_join([path.to_string_lossy().as_ref()])?; let command = format!("if [ -f {quoted_path} ]; then printf ready; fi"); Ok(format!( - r#"while ((await exec_command({{ cmd: {command:?} }})).output !== "ready") {{ + r#"while ((await tools.exec_command({{ cmd: {command:?} }})).output !== "ready") {{ }}"# )) } @@ -197,9 +197,7 @@ async fn code_mode_can_return_exec_command_output() -> Result<()> { &server, "use exec to run exec_command", r#" -import { exec_command } from "tools.js"; - -add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_marker" }))); +text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marker" }))); "#, false, ) @@ -239,9 +237,29 @@ async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; - let code = r#" -import { test_sync_tool } from "tools.js"; + let mut builder = test_codex() + .with_model("test-gpt-5.1-codex") + .with_config(move |config| { + let _ = config.features.enable(Feature::CodeMode); + }); + let test = builder.build(&server).await?; + let warmup_code = r#" +const args = { + sleep_after_ms: 10, + barrier: { + id: "code-mode-parallel-tools-warmup", + participants: 2, + timeout_ms: 1_000, + }, +}; + +await Promise.all([ + tools.test_sync_tool(args), + tools.test_sync_tool(args), +]); +"#; + let code = r#" const args = { sleep_after_ms: 300, barrier: { @@ -252,16 +270,42 @@ const args = { }; const results = await Promise.all([ - test_sync_tool(args), - test_sync_tool(args), + tools.test_sync_tool(args), + tools.test_sync_tool(args), ]); -add_content(JSON.stringify(results)); +text(JSON.stringify(results)); "#; + let response_mock = responses::mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-warm-1"), + ev_custom_tool_call("call-warm-1", "exec", warmup_code), + ev_completed("resp-warm-1"), + ]), + sse(vec![ + ev_assistant_message("msg-warm-1", "warmup done"), + ev_completed("resp-warm-2"), + ]), + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call("call-1", "exec", code), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + test.submit_turn("warm up nested tools in parallel").await?; + let start = Instant::now(); - let (_test, second_mock) = - run_code_mode_turn(&server, "run nested tools in parallel", code, false).await?; + test.submit_turn("run nested tools in parallel").await?; let duration = start.elapsed(); assert!( @@ -269,7 +313,9 @@ add_content(JSON.stringify(results)); "expected nested tools to finish in parallel, got {duration:?}", ); - let req = second_mock.single_request(); + let req = response_mock + .last_request() + .expect("parallel code mode run should send a completion request"); let items = custom_tool_output_items(&req, "call-1"); assert_eq!(items.len(), 2); assert_eq!(text_item(&items, 1), "[\"ok\",\"ok\"]"); @@ -287,12 +333,9 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result< &server, "use exec to truncate the final result", r#" -import { exec_command } from "tools.js"; -import { set_max_output_tokens_per_exec_call } from "@openai/code_mode"; - set_max_output_tokens_per_exec_call(6); -add_content(JSON.stringify(await exec_command({ +text(JSON.stringify(await tools.exec_command({ cmd: "printf 'token one token two token three token four token five token six token seven'", max_output_tokens: 100 }))); @@ -332,8 +375,8 @@ async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()> &server, "use code_mode to surface script failures", r#" -add_content("before crash"); -add_content("still before crash"); +text("before crash"); +text("still before crash"); throw new Error("boom"); "#, false, @@ -383,15 +426,12 @@ async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> { let code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("phase 1"); +text("phase 1"); set_yield_time(10); {phase_2_wait} -output_text("phase 2"); +text("phase 2"); {phase_3_wait} -output_text("phase 3"); +text("phase 3"); "# ); @@ -527,9 +567,7 @@ async fn code_mode_yield_timeout_works_for_busy_loop() -> Result<()> { let test = builder.build(&server).await?; let code = r#" -import { output_text, set_yield_time } from "@openai/code_mode"; - -output_text("phase 1"); +text("phase 1"); set_yield_time(10); while (true) {} "#; @@ -629,24 +667,18 @@ async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> { let session_a_code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("session a start"); +text("session a start"); set_yield_time(10); {session_a_wait} -output_text("session a done"); +text("session a done"); "# ); let session_b_code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("session b start"); +text("session b start"); set_yield_time(10); {session_b_wait} -output_text("session b done"); +text("session b done"); "# ); @@ -801,13 +833,10 @@ async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> { let code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("phase 1"); +text("phase 1"); set_yield_time(10); {termination_wait} -output_text("phase 2"); +text("phase 2"); "# ); @@ -883,9 +912,7 @@ output_text("phase 2"); "call-3", "exec", r#" -import { output_text } from "@openai/code_mode"; - -output_text("after terminate"); +text("after terminate"); "#, ), ev_completed("resp-5"), @@ -1000,25 +1027,19 @@ async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_ let session_a_code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("session a start"); +text("session a start"); set_yield_time(10); {session_a_wait} -output_text("session a done"); -await exec_command({{ cmd: {session_a_done_command:?} }}); +text("session a done"); +await tools.exec_command({{ cmd: {session_a_done_command:?} }}); "# ); let session_b_code = format!( r#" -import {{ output_text, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("session b start"); +text("session b start"); set_yield_time(10); {session_b_wait} -output_text("session b done"); +text("session b done"); "# ); @@ -1197,13 +1218,10 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() -> format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready"); let code = format!( r#" -import {{ yield_control, output_text }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("before yield"); +text("before yield"); yield_control(); -await exec_command({{ cmd: {write_file_command:?} }}); -output_text("after yield"); +await tools.exec_command({{ cmd: {write_file_command:?} }}); +text("after yield"); "# ); @@ -1291,14 +1309,11 @@ async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> { let code = format!( r#" -import {{ output_text, set_max_output_tokens_per_exec_call, set_yield_time }} from "@openai/code_mode"; -import {{ exec_command }} from "tools.js"; - -output_text("phase 1"); +text("phase 1"); set_max_output_tokens_per_exec_call(100); set_yield_time(10); {completion_wait} -output_text("token one token two token three token four token five token six token seven"); +text("token one token two token three token four token five token six token seven"); "# ); @@ -1380,7 +1395,7 @@ Total\ output\ lines:\ 1\n } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> Result<()> { +async fn code_mode_can_output_serialized_text_via_global_helper() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; @@ -1388,9 +1403,7 @@ async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> R &server, "use exec to return structured text", r#" -import { output_text } from "@openai/code_mode"; - -output_text({ json: true }); +text({ json: true }); "#, false, ) @@ -1409,7 +1422,7 @@ output_text({ json: true }); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> { +async fn code_mode_surfaces_text_stringify_errors() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; @@ -1417,11 +1430,9 @@ async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> { &server, "use exec to return circular text", r#" -import { output_text } from "@openai/code_mode"; - const circular = {}; circular.self = circular; -output_text(circular); +text(circular); "#, false, ) @@ -1452,7 +1463,7 @@ output_text(circular); } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()> { +async fn code_mode_can_output_images_via_global_helper() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; @@ -1460,10 +1471,8 @@ async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()> &server, "use exec to return images", r#" -import { output_image } from "@openai/code_mode"; - -output_image("https://example.com/image.jpg"); -output_image("data:image/png;base64,AAA"); +image("https://example.com/image.jpg"); +image("data:image/png;base64,AAA"); "#, false, ) @@ -1512,9 +1521,7 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { let patch = format!( "*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n" ); - let code = format!( - "import {{ apply_patch }} from \"tools.js\";\nconst items = await apply_patch({patch:?});\nadd_content(items);\n" - ); + let code = format!("text(await tools.apply_patch({patch:?}));\n"); let (test, second_mock) = run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?; @@ -1550,12 +1557,10 @@ async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> { let server = responses::start_mock_server().await; let code = r#" -import { echo } from "tools/mcp/rmcp.js"; - -const { content, structuredContent, isError } = await echo({ +const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({ message: "ping", }); -add_content( +text( `echo=${structuredContent?.echo ?? "missing"}\n` + `env=${structuredContent?.env ?? "missing"}\n` + `isError=${String(isError)}\n` + @@ -1585,37 +1590,33 @@ contentLength=0" } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_can_dynamically_import_namespaced_mcp_tools() -> Result<()> { +async fn code_mode_exposes_mcp_tools_on_global_tools_object() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; let code = r#" -const rmcp = await import("tools/mcp/rmcp.js"); -const { content, structuredContent, isError } = await rmcp.echo({ +const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({ message: "ping", }); -add_content( - `hasEcho=${String(Object.keys(rmcp).includes("echo"))}\n` + - `echoType=${typeof rmcp.echo}\n` + +text( + `hasEcho=${String(Object.keys(tools).includes("mcp__rmcp__echo"))}\n` + + `echoType=${typeof tools.mcp__rmcp__echo}\n` + `echo=${structuredContent?.echo ?? "missing"}\n` + `isError=${String(isError)}\n` + `contentLength=${content.length}` ); "#; - let (_test, second_mock) = run_code_mode_turn_with_rmcp( - &server, - "use exec to dynamically import the rmcp module", - code, - ) - .await?; + let (_test, second_mock) = + run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code) + .await?; let req = second_mock.single_request(); let (output, success) = custom_tool_output_body_and_success(&req, "call-1"); assert_ne!( success, Some(false), - "exec dynamic rmcp import failed unexpectedly: {output}" + "exec global rmcp access failed unexpectedly: {output}" ); assert_eq!( output, @@ -1630,20 +1631,18 @@ contentLength=0" } #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_normalizes_illegal_namespaced_mcp_tool_identifiers() -> Result<()> { +async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> { skip_if_no_network!(Ok(())); let server = responses::start_mock_server().await; let code = r#" -import { echo_tool } from "tools/mcp/rmcp.js"; - -const result = await echo_tool({ message: "ping" }); -add_content(`echo=${result.structuredContent.echo}`); +const result = await tools.mcp__rmcp__echo_tool({ message: "ping" }); +text(`echo=${result.structuredContent.echo}`); "#; let (_test, second_mock) = run_code_mode_turn_with_rmcp( &server, - "use exec to import a normalized rmcp tool name", + "use exec to call a normalized rmcp tool name", code, ) .await?; @@ -1653,7 +1652,7 @@ add_content(`echo=${result.structuredContent.echo}`); assert_ne!( success, Some(false), - "exec normalized rmcp import failed unexpectedly: {output}" + "exec normalized rmcp tool call failed unexpectedly: {output}" ); assert_eq!(output, "echo=ECHOING: ping"); @@ -1666,7 +1665,7 @@ async fn code_mode_lists_global_scope_items() -> Result<()> { let server = responses::start_mock_server().await; let code = r#" -add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort())); +text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort())); "#; let (_test, second_mock) = @@ -1683,6 +1682,7 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort())); let globals = globals.into_iter().collect::>(); let expected = [ "AggregateError", + "ALL_TOOLS", "Array", "ArrayBuffer", "AsyncDisposableStack", @@ -1736,7 +1736,6 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort())); "WeakSet", "WebAssembly", "__codexContentItems", - "add_content", "console", "decodeURI", "decodeURIComponent", @@ -1745,12 +1744,20 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort())); "escape", "eval", "globalThis", + "image", "isFinite", "isNaN", + "load", "parseFloat", "parseInt", + "set_max_output_tokens_per_exec_call", + "set_yield_time", + "store", + "text", + "tools", "undefined", "unescape", + "yield_control", ]; for g in &globals { assert!( @@ -1768,10 +1775,8 @@ async fn code_mode_exports_all_tools_metadata_for_builtin_tools() -> Result<()> let server = responses::start_mock_server().await; let code = r#" -import { ALL_TOOLS } from "tools.js"; - -const tool = ALL_TOOLS.find(({ module, name }) => module === "tools.js" && name === "view_image"); -add_content(JSON.stringify(tool)); +const tool = ALL_TOOLS.find(({ name }) => name === "view_image"); +text(JSON.stringify(tool)); "#; let (_test, second_mock) = @@ -1789,9 +1794,8 @@ add_content(JSON.stringify(tool)); assert_eq!( parsed, serde_json::json!({ - "module": "tools.js", "name": "view_image", - "description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nCode mode declaration:\n```ts\nimport { view_image } from \"tools.js\";\ndeclare function view_image(args: {\n path: string;\n}): Promise;\n```", + "description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within tags).\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n view_image(args: {\n path: string;\n }): Promise;\n};\n```", }) ); @@ -1804,12 +1808,10 @@ async fn code_mode_exports_all_tools_metadata_for_namespaced_mcp_tools() -> Resu let server = responses::start_mock_server().await; let code = r#" -import { ALL_TOOLS } from "tools.js"; - const tool = ALL_TOOLS.find( - ({ module, name }) => module === "tools/mcp/rmcp.js" && name === "echo" + ({ name }) => name === "mcp__rmcp__echo" ); -add_content(JSON.stringify(tool)); +text(JSON.stringify(tool)); "#; let (_test, second_mock) = @@ -1827,9 +1829,8 @@ add_content(JSON.stringify(tool)); assert_eq!( parsed, serde_json::json!({ - "module": "tools/mcp/rmcp.js", - "name": "echo", - "description": "Echo back the provided message and include environment data.\n\nCode mode declaration:\n```ts\nimport { echo } from \"tools/mcp/rmcp.js\";\ndeclare function echo(args: {\n env_var?: string;\n message: string;\n}): Promise<{\n _meta?: unknown;\n content: Array;\n isError?: boolean;\n structuredContent?: unknown;\n}>;\n```", + "name": "mcp__rmcp__echo", + "description": "Echo back the provided message and include environment data.\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__rmcp__echo(args: {\n env_var?: string;\n message: string;\n }): Promise<{\n _meta?: unknown;\n content: Array;\n isError?: boolean;\n structuredContent?: unknown;\n }>;\n};\n```", }) ); @@ -1842,13 +1843,11 @@ async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()> let server = responses::start_mock_server().await; let code = r#" -import { image_scenario } from "tools/mcp/rmcp.js"; - -const { content, structuredContent, isError } = await image_scenario({ +const { content, structuredContent, isError } = await tools.mcp__rmcp__image_scenario({ scenario: "text_only", caption: "caption from mcp", }); -add_content( +text( `firstType=${content[0]?.type ?? "missing"}\n` + `firstText=${content[0]?.text ?? "missing"}\n` + `structuredContent=${String(structuredContent ?? null)}\n` + @@ -1887,13 +1886,11 @@ async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> { let server = responses::start_mock_server().await; let code = r#" -import { echo } from "tools/mcp/rmcp.js"; - -const { content, structuredContent, isError } = await echo({}); +const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({}); const firstText = content[0]?.text ?? ""; const mentionsMissingMessage = firstText.includes("missing field") && firstText.includes("message"); -add_content( +text( `isError=${String(isError)}\n` + `contentLength=${content.length}\n` + `mentionsMissingMessage=${String(mentionsMissingMessage)}\n` + @@ -1939,10 +1936,8 @@ async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> { "call-1", "exec", r#" -import { store } from "@openai/code_mode"; - store("nb", { title: "Notebook", items: [1, true, null] }); -add_content("stored"); +text("stored"); "#, ), ev_completed("resp-1"), @@ -1978,9 +1973,7 @@ add_content("stored"); "call-2", "exec", r#" -import { load } from "openai/code_mode"; - -add_content(JSON.stringify(load("nb"))); +text(JSON.stringify(load("nb"))); "#, ), ev_completed("resp-3"),