Expose code-mode tools through globals (#14517)

Summary
- make all code-mode tools accessible as globals so callers only need
`tools.<name>`
- rename text/image helpers and key globals (store, load, ALL_TOOLS,
etc.) to reflect the new shared namespace
- update the JS bridge, runners, descriptions, router, and tests to
follow the new API

Testing
- Not run (not requested)
This commit is contained in:
pakrym-oai 2026-03-12 15:43:59 -07:00 committed by GitHub
parent b560494c9f
commit a2546d5dff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 325 additions and 246 deletions

View file

@ -1,7 +1,9 @@
const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__;
const __codexContentItems = Array.isArray(globalThis.__codexContentItems)
? globalThis.__codexContentItems
: [];
const __codexRuntime = globalThis.__codexRuntime;
delete globalThis.__codexRuntime;
Object.defineProperty(globalThis, '__codexContentItems', {
value: __codexContentItems,
@ -11,53 +13,42 @@ Object.defineProperty(globalThis, '__codexContentItems', {
});
(() => {
function cloneContentItem(item) {
if (!item || typeof item !== 'object') {
throw new TypeError('content item must be an object');
}
switch (item.type) {
case 'input_text':
if (typeof item.text !== 'string') {
throw new TypeError('content item "input_text" requires a string text field');
}
return { type: 'input_text', text: item.text };
case 'input_image':
if (typeof item.image_url !== 'string') {
throw new TypeError('content item "input_image" requires a string image_url field');
}
return { type: 'input_image', image_url: item.image_url };
default:
throw new TypeError(`unsupported content item type "${item.type}"`);
}
if (!__codexRuntime || typeof __codexRuntime !== 'object') {
throw new Error('code mode runtime is unavailable');
}
function normalizeRawContentItems(value) {
if (Array.isArray(value)) {
return value.flatMap((entry) => normalizeRawContentItems(entry));
}
return [cloneContentItem(value)];
function defineGlobal(name, value) {
Object.defineProperty(globalThis, name, {
value,
configurable: true,
enumerable: true,
writable: false,
});
}
function normalizeContentItems(value) {
if (typeof value === 'string') {
return [{ type: 'input_text', text: value }];
}
return normalizeRawContentItems(value);
}
defineGlobal('ALL_TOOLS', __codexRuntime.ALL_TOOLS);
defineGlobal('image', __codexRuntime.image);
defineGlobal('load', __codexRuntime.load);
defineGlobal(
'set_max_output_tokens_per_exec_call',
__codexRuntime.set_max_output_tokens_per_exec_call
);
defineGlobal('set_yield_time', __codexRuntime.set_yield_time);
defineGlobal('store', __codexRuntime.store);
defineGlobal('text', __codexRuntime.text);
defineGlobal('tools', __codexRuntime.tools);
defineGlobal('yield_control', __codexRuntime.yield_control);
globalThis.add_content = (value) => {
const contentItems = normalizeContentItems(value);
__codexContentItems.push(...contentItems);
return contentItems;
};
globalThis.console = Object.freeze({
log() {},
info() {},
warn() {},
error() {},
debug() {},
});
defineGlobal(
'console',
Object.freeze({
log() {},
info() {},
warn() {},
error() {},
debug() {},
})
);
})();
__CODE_MODE_USER_CODE_PLACEHOLDER__

View file

@ -1,18 +1,16 @@
## exec
- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js`
- All nested tools are available on the global `tools` object, for example `await tools.exec_command(...)`. Tool names are exposed as normalized JavaScript identifiers, for example `await tools.mcp__ologs__get_profile(...)`.
- Tool methods take either string or object as parameter.
- They return either a structured value or a string based on the description above.
- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values.
- Methods available in `@openai/code_mode` module:
- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values.
- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
- Global helpers:
- `text(value: string | number | boolean | undefined | null)`: Appends a text item and returns it. Non-string values are stringified with `JSON.stringify(...)` when possible.
- `image(imageUrl: string)`: Appends an image item and returns it. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
- `ALL_TOOLS`: metadata for the enabled nested tools as `{ name, description }` entries.
- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.

View file

@ -280,8 +280,6 @@ async fn call_nested_tool(
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
}
let router = build_nested_router(&exec).await;
let specs = router.specs();
let payload =
if let Some((server, tool)) = exec.session.parse_mcp_tool_name(&tool_name, &None).await {
match serialize_function_tool_arguments(&tool_name, input) {
@ -293,7 +291,7 @@ async fn call_nested_tool(
Err(error) => return JsonValue::String(error),
}
} else {
match build_nested_tool_payload(&specs, &tool_name, input) {
match build_nested_tool_payload(tool_runtime.find_spec(&tool_name), &tool_name, input) {
Ok(payload) => payload,
Err(error) => return JsonValue::String(error),
}
@ -324,22 +322,20 @@ fn tool_kind_for_spec(spec: &ToolSpec) -> protocol::CodeModeToolKind {
}
fn tool_kind_for_name(
specs: &[ToolSpec],
spec: Option<ToolSpec>,
tool_name: &str,
) -> Result<protocol::CodeModeToolKind, String> {
specs
.iter()
.find(|spec| spec.name() == tool_name)
spec.as_ref()
.map(tool_kind_for_spec)
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
}
fn build_nested_tool_payload(
specs: &[ToolSpec],
spec: Option<ToolSpec>,
tool_name: &str,
input: Option<JsonValue>,
) -> Result<ToolPayload, String> {
let actual_kind = tool_kind_for_name(specs, tool_name)?;
let actual_kind = tool_kind_for_name(spec, tool_name)?;
match actual_kind {
protocol::CodeModeToolKind::Function => build_function_tool_payload(tool_name, input),
protocol::CodeModeToolKind::Freeform => build_freeform_tool_payload(tool_name, input),

View file

@ -131,7 +131,22 @@ function codeModeWorkerMain() {
return contentItems;
}
function createToolsNamespace(callTool, enabledTools) {
function createGlobalToolsNamespace(callTool, enabledTools) {
const tools = Object.create(null);
for (const { tool_name, global_name } of enabledTools) {
Object.defineProperty(tools, global_name, {
value: async (args) => callTool(tool_name, args),
configurable: false,
enumerable: true,
writable: false,
});
}
return Object.freeze(tools);
}
function createModuleToolsNamespace(callTool, enabledTools) {
const tools = Object.create(null);
for (const { tool_name, global_name } of enabledTools) {
@ -148,10 +163,9 @@ function codeModeWorkerMain() {
function createAllToolsMetadata(enabledTools) {
return Object.freeze(
enabledTools.map(({ module: modulePath, name, description }) =>
enabledTools.map(({ global_name, description }) =>
Object.freeze({
module: modulePath,
name,
name: global_name,
description,
})
)
@ -159,7 +173,7 @@ function codeModeWorkerMain() {
}
function createToolsModule(context, callTool, enabledTools) {
const tools = createToolsNamespace(callTool, enabledTools);
const tools = createModuleToolsNamespace(callTool, enabledTools);
const allTools = createAllToolsMetadata(enabledTools);
const exportNames = ['ALL_TOOLS'];
@ -216,15 +230,15 @@ function codeModeWorkerMain() {
function normalizeOutputImageUrl(value) {
if (typeof value !== 'string' || !value) {
throw new TypeError('output_image expects a non-empty image URL string');
throw new TypeError('image expects a non-empty image URL string');
}
if (/^(?:https?:\/\/|data:)/i.test(value)) {
return value;
}
throw new TypeError('output_image expects an http(s) or data URL');
throw new TypeError('image expects an http(s) or data URL');
}
function createCodeModeModule(context, state) {
function createCodeModeHelpers(context, state) {
const load = (key) => {
if (typeof key !== 'string') {
throw new TypeError('load key must be a string');
@ -240,7 +254,7 @@ function codeModeWorkerMain() {
}
state.storedValues[key] = cloneJsonValue(value);
};
const outputText = (value) => {
const text = (value) => {
const item = {
type: 'input_text',
text: serializeOutputText(value),
@ -248,7 +262,7 @@ function codeModeWorkerMain() {
ensureContentItems(context).push(item);
return item;
};
const outputImage = (value) => {
const image = (value) => {
const item = {
type: 'input_image',
image_url: normalizeOutputImageUrl(value),
@ -256,47 +270,85 @@ function codeModeWorkerMain() {
ensureContentItems(context).push(item);
return item;
};
const setMaxOutputTokensPerExecCall = (value) => {
const normalized = normalizeMaxOutputTokensPerExecCall(value);
state.maxOutputTokensPerExecCall = normalized;
parentPort.postMessage({
type: 'set_max_output_tokens_per_exec_call',
value: normalized,
});
return normalized;
};
const setYieldTime = (value) => {
const normalized = normalizeYieldTime(value);
parentPort.postMessage({
type: 'set_yield_time',
value: normalized,
});
return normalized;
};
const yieldControl = () => {
parentPort.postMessage({ type: 'yield' });
};
return Object.freeze({
image,
load,
output_image: image,
output_text: text,
set_max_output_tokens_per_exec_call: setMaxOutputTokensPerExecCall,
set_yield_time: setYieldTime,
store,
text,
yield_control: yieldControl,
});
}
function createCodeModeModule(context, helpers) {
return new SyntheticModule(
[
'image',
'load',
'output_text',
'output_image',
'set_max_output_tokens_per_exec_call',
'set_yield_time',
'store',
'text',
'yield_control',
],
function initCodeModeModule() {
this.setExport('load', load);
this.setExport('output_text', outputText);
this.setExport('output_image', outputImage);
this.setExport('set_max_output_tokens_per_exec_call', (value) => {
const normalized = normalizeMaxOutputTokensPerExecCall(value);
state.maxOutputTokensPerExecCall = normalized;
parentPort.postMessage({
type: 'set_max_output_tokens_per_exec_call',
value: normalized,
});
return normalized;
});
this.setExport('set_yield_time', (value) => {
const normalized = normalizeYieldTime(value);
parentPort.postMessage({
type: 'set_yield_time',
value: normalized,
});
return normalized;
});
this.setExport('store', store);
this.setExport('yield_control', () => {
parentPort.postMessage({ type: 'yield' });
});
this.setExport('image', helpers.image);
this.setExport('load', helpers.load);
this.setExport('output_text', helpers.output_text);
this.setExport('output_image', helpers.output_image);
this.setExport(
'set_max_output_tokens_per_exec_call',
helpers.set_max_output_tokens_per_exec_call
);
this.setExport('set_yield_time', helpers.set_yield_time);
this.setExport('store', helpers.store);
this.setExport('text', helpers.text);
this.setExport('yield_control', helpers.yield_control);
},
{ context }
);
}
function createBridgeRuntime(callTool, enabledTools, helpers) {
return Object.freeze({
ALL_TOOLS: createAllToolsMetadata(enabledTools),
image: helpers.image,
load: helpers.load,
set_max_output_tokens_per_exec_call: helpers.set_max_output_tokens_per_exec_call,
set_yield_time: helpers.set_yield_time,
store: helpers.store,
text: helpers.text,
tools: createGlobalToolsNamespace(callTool, enabledTools),
yield_control: helpers.yield_control,
});
}
function namespacesMatch(left, right) {
if (left.length !== right.length) {
return false;
@ -347,16 +399,18 @@ function codeModeWorkerMain() {
);
}
function createModuleResolver(context, callTool, enabledTools, state) {
const toolsModule = createToolsModule(context, callTool, enabledTools);
const codeModeModule = createCodeModeModule(context, state);
function createModuleResolver(context, callTool, enabledTools, helpers) {
let toolsModule;
let codeModeModule;
const namespacedModules = new Map();
return function resolveModule(specifier) {
if (specifier === 'tools.js') {
toolsModule ??= createToolsModule(context, callTool, enabledTools);
return toolsModule;
}
if (specifier === '@openai/code_mode' || specifier === 'openai/code_mode') {
codeModeModule ??= createCodeModeModule(context, helpers);
return codeModeModule;
}
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
@ -400,12 +454,12 @@ function codeModeWorkerMain() {
return module;
}
async function runModule(context, start, state, callTool) {
async function runModule(context, start, callTool, helpers) {
const resolveModule = createModuleResolver(
context,
callTool,
start.enabled_tools ?? [],
state
helpers
);
const mainModule = new SourceTextModule(start.source, {
context,
@ -425,12 +479,21 @@ function codeModeWorkerMain() {
storedValues: cloneJsonValue(start.stored_values ?? {}),
};
const callTool = createToolCaller();
const enabledTools = start.enabled_tools ?? [];
const contentItems = createContentItems();
const context = vm.createContext({
__codexContentItems: createContentItems(),
__codexContentItems: contentItems,
});
const helpers = createCodeModeHelpers(context, state);
Object.defineProperty(context, '__codexRuntime', {
value: createBridgeRuntime(callTool, enabledTools, helpers),
configurable: true,
enumerable: false,
writable: false,
});
try {
await runModule(context, start, state, callTool);
await runModule(context, start, callTool, helpers);
parentPort.postMessage({
type: 'result',
stored_values: state.storedValues,

View file

@ -74,15 +74,41 @@ fn append_code_mode_sample(
input_type: String,
output_type: String,
) -> String {
let reference = code_mode_tool_reference(tool_name);
let local_name = normalize_code_mode_identifier(&reference.tool_key);
let declaration = format!(
"import {{ {local_name} }} from \"{}\";\ndeclare function {local_name}({input_name}: {input_type}): Promise<{output_type}>;",
reference.module_path
"declare const tools: {{\n {}\n}};",
render_code_mode_tool_declaration(tool_name, input_name, input_type, output_type)
);
format!("{description}\n\nCode mode declaration:\n```ts\n{declaration}\n```")
}
fn render_code_mode_tool_declaration(
tool_name: &str,
input_name: &str,
input_type: String,
output_type: String,
) -> String {
let input_type = indent_multiline_type(&input_type, 2);
let output_type = indent_multiline_type(&output_type, 2);
let tool_name = normalize_code_mode_identifier(tool_name);
format!("{tool_name}({input_name}: {input_type}): Promise<{output_type}>;")
}
fn indent_multiline_type(type_name: &str, spaces: usize) -> String {
let indent = " ".repeat(spaces);
type_name
.lines()
.enumerate()
.map(|(index, line)| {
if index == 0 {
line.to_string()
} else {
format!("{indent}{line}")
}
})
.collect::<Vec<_>>()
.join("\n")
}
pub(crate) fn normalize_code_mode_identifier(tool_key: &str) -> String {
let mut identifier = String::new();

View file

@ -76,7 +76,7 @@ fn render_json_schema_to_typescript_sorts_object_properties() {
}
#[test]
fn append_code_mode_sample_uses_static_import_for_valid_identifiers() {
fn append_code_mode_sample_uses_global_tools_for_valid_identifiers() {
assert_eq!(
append_code_mode_sample(
"desc",
@ -85,12 +85,12 @@ fn append_code_mode_sample_uses_static_import_for_valid_identifiers() {
"{ foo: string }".to_string(),
"unknown".to_string(),
),
"desc\n\nCode mode declaration:\n```ts\nimport { get_profile } from \"tools/mcp/ologs.js\";\ndeclare function get_profile(args: { foo: string }): Promise<unknown>;\n```"
"desc\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__ologs__get_profile(args: { foo: string }): Promise<unknown>;\n};\n```"
);
}
#[test]
fn append_code_mode_sample_normalizes_non_identifier_tool_names() {
fn append_code_mode_sample_normalizes_invalid_identifiers() {
assert_eq!(
append_code_mode_sample(
"desc",
@ -99,6 +99,6 @@ fn append_code_mode_sample_normalizes_non_identifier_tool_names() {
"{ foo: string }".to_string(),
"unknown".to_string(),
),
"desc\n\nCode mode declaration:\n```ts\nimport { echo_tool } from \"tools/mcp/rmcp.js\";\ndeclare function echo_tool(args: { foo: string }): Promise<unknown>;\n```"
"desc\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__rmcp__echo_tool(args: { foo: string }): Promise<unknown>;\n};\n```"
);
}

View file

@ -9,6 +9,7 @@ use tracing::Instrument;
use tracing::instrument;
use tracing::trace_span;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::error::CodexErr;
@ -46,6 +47,10 @@ impl ToolCallRuntime {
}
}
pub(crate) fn find_spec(&self, tool_name: &str) -> Option<ToolSpec> {
self.router.find_spec(tool_name)
}
#[instrument(level = "trace", skip_all)]
pub(crate) fn handle_tool_call(
self,

View file

@ -75,6 +75,13 @@ impl ToolRouter {
.collect()
}
pub fn find_spec(&self, tool_name: &str) -> Option<ToolSpec> {
self.specs
.iter()
.find(|config| config.spec.name() == tool_name)
.map(|config| config.spec.clone())
}
pub fn tool_supports_parallel(&self, tool_name: &str) -> bool {
self.specs
.iter()

View file

@ -2443,7 +2443,7 @@ fn code_mode_augments_builtin_tool_descriptions_with_typed_sample() {
assert_eq!(
description,
"View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nCode mode declaration:\n```ts\nimport { view_image } from \"tools.js\";\ndeclare function view_image(args: {\n path: string;\n}): Promise<unknown>;\n```"
"View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n view_image(args: {\n path: string;\n }): Promise<unknown>;\n};\n```"
);
}
@ -2495,7 +2495,7 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() {
assert_eq!(
description,
"Echo text\n\nCode mode declaration:\n```ts\nimport { echo } from \"tools/mcp/sample.js\";\ndeclare function echo(args: {\n message: string;\n}): Promise<{\n _meta?: unknown;\n content: Array<unknown>;\n isError?: boolean;\n structuredContent?: unknown;\n}>;\n```"
"Echo text\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__sample__echo(args: {\n message: string;\n }): Promise<{\n _meta?: unknown;\n content: Array<unknown>;\n isError?: boolean;\n structuredContent?: unknown;\n }>;\n};\n```"
);
}

View file

@ -63,7 +63,7 @@ fn wait_for_file_source(path: &Path) -> Result<String> {
let quoted_path = shlex::try_join([path.to_string_lossy().as_ref()])?;
let command = format!("if [ -f {quoted_path} ]; then printf ready; fi");
Ok(format!(
r#"while ((await exec_command({{ cmd: {command:?} }})).output !== "ready") {{
r#"while ((await tools.exec_command({{ cmd: {command:?} }})).output !== "ready") {{
}}"#
))
}
@ -197,9 +197,7 @@ async fn code_mode_can_return_exec_command_output() -> Result<()> {
&server,
"use exec to run exec_command",
r#"
import { exec_command } from "tools.js";
add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_marker" })));
text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marker" })));
"#,
false,
)
@ -239,9 +237,29 @@ async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { test_sync_tool } from "tools.js";
let mut builder = test_codex()
.with_model("test-gpt-5.1-codex")
.with_config(move |config| {
let _ = config.features.enable(Feature::CodeMode);
});
let test = builder.build(&server).await?;
let warmup_code = r#"
const args = {
sleep_after_ms: 10,
barrier: {
id: "code-mode-parallel-tools-warmup",
participants: 2,
timeout_ms: 1_000,
},
};
await Promise.all([
tools.test_sync_tool(args),
tools.test_sync_tool(args),
]);
"#;
let code = r#"
const args = {
sleep_after_ms: 300,
barrier: {
@ -252,16 +270,42 @@ const args = {
};
const results = await Promise.all([
test_sync_tool(args),
test_sync_tool(args),
tools.test_sync_tool(args),
tools.test_sync_tool(args),
]);
add_content(JSON.stringify(results));
text(JSON.stringify(results));
"#;
let response_mock = responses::mount_sse_sequence(
&server,
vec![
sse(vec![
ev_response_created("resp-warm-1"),
ev_custom_tool_call("call-warm-1", "exec", warmup_code),
ev_completed("resp-warm-1"),
]),
sse(vec![
ev_assistant_message("msg-warm-1", "warmup done"),
ev_completed("resp-warm-2"),
]),
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call("call-1", "exec", code),
ev_completed("resp-1"),
]),
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
],
)
.await;
test.submit_turn("warm up nested tools in parallel").await?;
let start = Instant::now();
let (_test, second_mock) =
run_code_mode_turn(&server, "run nested tools in parallel", code, false).await?;
test.submit_turn("run nested tools in parallel").await?;
let duration = start.elapsed();
assert!(
@ -269,7 +313,9 @@ add_content(JSON.stringify(results));
"expected nested tools to finish in parallel, got {duration:?}",
);
let req = second_mock.single_request();
let req = response_mock
.last_request()
.expect("parallel code mode run should send a completion request");
let items = custom_tool_output_items(&req, "call-1");
assert_eq!(items.len(), 2);
assert_eq!(text_item(&items, 1), "[\"ok\",\"ok\"]");
@ -287,12 +333,9 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
&server,
"use exec to truncate the final result",
r#"
import { exec_command } from "tools.js";
import { set_max_output_tokens_per_exec_call } from "@openai/code_mode";
set_max_output_tokens_per_exec_call(6);
add_content(JSON.stringify(await exec_command({
text(JSON.stringify(await tools.exec_command({
cmd: "printf 'token one token two token three token four token five token six token seven'",
max_output_tokens: 100
})));
@ -332,8 +375,8 @@ async fn code_mode_returns_accumulated_output_when_script_fails() -> Result<()>
&server,
"use code_mode to surface script failures",
r#"
add_content("before crash");
add_content("still before crash");
text("before crash");
text("still before crash");
throw new Error("boom");
"#,
false,
@ -383,15 +426,12 @@ async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> {
let code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("phase 1");
text("phase 1");
set_yield_time(10);
{phase_2_wait}
output_text("phase 2");
text("phase 2");
{phase_3_wait}
output_text("phase 3");
text("phase 3");
"#
);
@ -527,9 +567,7 @@ async fn code_mode_yield_timeout_works_for_busy_loop() -> Result<()> {
let test = builder.build(&server).await?;
let code = r#"
import { output_text, set_yield_time } from "@openai/code_mode";
output_text("phase 1");
text("phase 1");
set_yield_time(10);
while (true) {}
"#;
@ -629,24 +667,18 @@ async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> {
let session_a_code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("session a start");
text("session a start");
set_yield_time(10);
{session_a_wait}
output_text("session a done");
text("session a done");
"#
);
let session_b_code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("session b start");
text("session b start");
set_yield_time(10);
{session_b_wait}
output_text("session b done");
text("session b done");
"#
);
@ -801,13 +833,10 @@ async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> {
let code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("phase 1");
text("phase 1");
set_yield_time(10);
{termination_wait}
output_text("phase 2");
text("phase 2");
"#
);
@ -883,9 +912,7 @@ output_text("phase 2");
"call-3",
"exec",
r#"
import { output_text } from "@openai/code_mode";
output_text("after terminate");
text("after terminate");
"#,
),
ev_completed("resp-5"),
@ -1000,25 +1027,19 @@ async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_
let session_a_code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("session a start");
text("session a start");
set_yield_time(10);
{session_a_wait}
output_text("session a done");
await exec_command({{ cmd: {session_a_done_command:?} }});
text("session a done");
await tools.exec_command({{ cmd: {session_a_done_command:?} }});
"#
);
let session_b_code = format!(
r#"
import {{ output_text, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("session b start");
text("session b start");
set_yield_time(10);
{session_b_wait}
output_text("session b done");
text("session b done");
"#
);
@ -1197,13 +1218,10 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() ->
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
let code = format!(
r#"
import {{ yield_control, output_text }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("before yield");
text("before yield");
yield_control();
await exec_command({{ cmd: {write_file_command:?} }});
output_text("after yield");
await tools.exec_command({{ cmd: {write_file_command:?} }});
text("after yield");
"#
);
@ -1291,14 +1309,11 @@ async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {
let code = format!(
r#"
import {{ output_text, set_max_output_tokens_per_exec_call, set_yield_time }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("phase 1");
text("phase 1");
set_max_output_tokens_per_exec_call(100);
set_yield_time(10);
{completion_wait}
output_text("token one token two token three token four token five token six token seven");
text("token one token two token three token four token five token six token seven");
"#
);
@ -1380,7 +1395,7 @@ Total\ output\ lines:\ 1\n
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> Result<()> {
async fn code_mode_can_output_serialized_text_via_global_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
@ -1388,9 +1403,7 @@ async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> R
&server,
"use exec to return structured text",
r#"
import { output_text } from "@openai/code_mode";
output_text({ json: true });
text({ json: true });
"#,
false,
)
@ -1409,7 +1422,7 @@ output_text({ json: true });
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> {
async fn code_mode_surfaces_text_stringify_errors() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
@ -1417,11 +1430,9 @@ async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> {
&server,
"use exec to return circular text",
r#"
import { output_text } from "@openai/code_mode";
const circular = {};
circular.self = circular;
output_text(circular);
text(circular);
"#,
false,
)
@ -1452,7 +1463,7 @@ output_text(circular);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()> {
async fn code_mode_can_output_images_via_global_helper() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
@ -1460,10 +1471,8 @@ async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()>
&server,
"use exec to return images",
r#"
import { output_image } from "@openai/code_mode";
output_image("https://example.com/image.jpg");
output_image("data:image/png;base64,AAA");
image("https://example.com/image.jpg");
image("data:image/png;base64,AAA");
"#,
false,
)
@ -1512,9 +1521,7 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
let patch = format!(
"*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n"
);
let code = format!(
"import {{ apply_patch }} from \"tools.js\";\nconst items = await apply_patch({patch:?});\nadd_content(items);\n"
);
let code = format!("text(await tools.apply_patch({patch:?}));\n");
let (test, second_mock) =
run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?;
@ -1550,12 +1557,10 @@ async fn code_mode_can_print_structured_mcp_tool_result_fields() -> Result<()> {
let server = responses::start_mock_server().await;
let code = r#"
import { echo } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await echo({
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
message: "ping",
});
add_content(
text(
`echo=${structuredContent?.echo ?? "missing"}\n` +
`env=${structuredContent?.env ?? "missing"}\n` +
`isError=${String(isError)}\n` +
@ -1585,37 +1590,33 @@ contentLength=0"
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_can_dynamically_import_namespaced_mcp_tools() -> Result<()> {
async fn code_mode_exposes_mcp_tools_on_global_tools_object() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
const rmcp = await import("tools/mcp/rmcp.js");
const { content, structuredContent, isError } = await rmcp.echo({
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({
message: "ping",
});
add_content(
`hasEcho=${String(Object.keys(rmcp).includes("echo"))}\n` +
`echoType=${typeof rmcp.echo}\n` +
text(
`hasEcho=${String(Object.keys(tools).includes("mcp__rmcp__echo"))}\n` +
`echoType=${typeof tools.mcp__rmcp__echo}\n` +
`echo=${structuredContent?.echo ?? "missing"}\n` +
`isError=${String(isError)}\n` +
`contentLength=${content.length}`
);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
&server,
"use exec to dynamically import the rmcp module",
code,
)
.await?;
let (_test, second_mock) =
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
.await?;
let req = second_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
assert_ne!(
success,
Some(false),
"exec dynamic rmcp import failed unexpectedly: {output}"
"exec global rmcp access failed unexpectedly: {output}"
);
assert_eq!(
output,
@ -1630,20 +1631,18 @@ contentLength=0"
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_normalizes_illegal_namespaced_mcp_tool_identifiers() -> Result<()> {
async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let code = r#"
import { echo_tool } from "tools/mcp/rmcp.js";
const result = await echo_tool({ message: "ping" });
add_content(`echo=${result.structuredContent.echo}`);
const result = await tools.mcp__rmcp__echo_tool({ message: "ping" });
text(`echo=${result.structuredContent.echo}`);
"#;
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
&server,
"use exec to import a normalized rmcp tool name",
"use exec to call a normalized rmcp tool name",
code,
)
.await?;
@ -1653,7 +1652,7 @@ add_content(`echo=${result.structuredContent.echo}`);
assert_ne!(
success,
Some(false),
"exec normalized rmcp import failed unexpectedly: {output}"
"exec normalized rmcp tool call failed unexpectedly: {output}"
);
assert_eq!(output, "echo=ECHOING: ping");
@ -1666,7 +1665,7 @@ async fn code_mode_lists_global_scope_items() -> Result<()> {
let server = responses::start_mock_server().await;
let code = r#"
add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
"#;
let (_test, second_mock) =
@ -1683,6 +1682,7 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
let globals = globals.into_iter().collect::<HashSet<_>>();
let expected = [
"AggregateError",
"ALL_TOOLS",
"Array",
"ArrayBuffer",
"AsyncDisposableStack",
@ -1736,7 +1736,6 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
"WeakSet",
"WebAssembly",
"__codexContentItems",
"add_content",
"console",
"decodeURI",
"decodeURIComponent",
@ -1745,12 +1744,20 @@ add_content(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
"escape",
"eval",
"globalThis",
"image",
"isFinite",
"isNaN",
"load",
"parseFloat",
"parseInt",
"set_max_output_tokens_per_exec_call",
"set_yield_time",
"store",
"text",
"tools",
"undefined",
"unescape",
"yield_control",
];
for g in &globals {
assert!(
@ -1768,10 +1775,8 @@ async fn code_mode_exports_all_tools_metadata_for_builtin_tools() -> Result<()>
let server = responses::start_mock_server().await;
let code = r#"
import { ALL_TOOLS } from "tools.js";
const tool = ALL_TOOLS.find(({ module, name }) => module === "tools.js" && name === "view_image");
add_content(JSON.stringify(tool));
const tool = ALL_TOOLS.find(({ name }) => name === "view_image");
text(JSON.stringify(tool));
"#;
let (_test, second_mock) =
@ -1789,9 +1794,8 @@ add_content(JSON.stringify(tool));
assert_eq!(
parsed,
serde_json::json!({
"module": "tools.js",
"name": "view_image",
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nCode mode declaration:\n```ts\nimport { view_image } from \"tools.js\";\ndeclare function view_image(args: {\n path: string;\n}): Promise<unknown>;\n```",
"description": "View a local image from the filesystem (only use if given a full filepath by the user, and the image isn't already attached to the thread context within <image ...> tags).\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n view_image(args: {\n path: string;\n }): Promise<unknown>;\n};\n```",
})
);
@ -1804,12 +1808,10 @@ async fn code_mode_exports_all_tools_metadata_for_namespaced_mcp_tools() -> Resu
let server = responses::start_mock_server().await;
let code = r#"
import { ALL_TOOLS } from "tools.js";
const tool = ALL_TOOLS.find(
({ module, name }) => module === "tools/mcp/rmcp.js" && name === "echo"
({ name }) => name === "mcp__rmcp__echo"
);
add_content(JSON.stringify(tool));
text(JSON.stringify(tool));
"#;
let (_test, second_mock) =
@ -1827,9 +1829,8 @@ add_content(JSON.stringify(tool));
assert_eq!(
parsed,
serde_json::json!({
"module": "tools/mcp/rmcp.js",
"name": "echo",
"description": "Echo back the provided message and include environment data.\n\nCode mode declaration:\n```ts\nimport { echo } from \"tools/mcp/rmcp.js\";\ndeclare function echo(args: {\n env_var?: string;\n message: string;\n}): Promise<{\n _meta?: unknown;\n content: Array<unknown>;\n isError?: boolean;\n structuredContent?: unknown;\n}>;\n```",
"name": "mcp__rmcp__echo",
"description": "Echo back the provided message and include environment data.\n\nCode mode declaration:\n```ts\ndeclare const tools: {\n mcp__rmcp__echo(args: {\n env_var?: string;\n message: string;\n }): Promise<{\n _meta?: unknown;\n content: Array<unknown>;\n isError?: boolean;\n structuredContent?: unknown;\n }>;\n};\n```",
})
);
@ -1842,13 +1843,11 @@ async fn code_mode_can_print_content_only_mcp_tool_result_fields() -> Result<()>
let server = responses::start_mock_server().await;
let code = r#"
import { image_scenario } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await image_scenario({
const { content, structuredContent, isError } = await tools.mcp__rmcp__image_scenario({
scenario: "text_only",
caption: "caption from mcp",
});
add_content(
text(
`firstType=${content[0]?.type ?? "missing"}\n` +
`firstText=${content[0]?.text ?? "missing"}\n` +
`structuredContent=${String(structuredContent ?? null)}\n` +
@ -1887,13 +1886,11 @@ async fn code_mode_can_print_error_mcp_tool_result_fields() -> Result<()> {
let server = responses::start_mock_server().await;
let code = r#"
import { echo } from "tools/mcp/rmcp.js";
const { content, structuredContent, isError } = await echo({});
const { content, structuredContent, isError } = await tools.mcp__rmcp__echo({});
const firstText = content[0]?.text ?? "";
const mentionsMissingMessage =
firstText.includes("missing field") && firstText.includes("message");
add_content(
text(
`isError=${String(isError)}\n` +
`contentLength=${content.length}\n` +
`mentionsMissingMessage=${String(mentionsMissingMessage)}\n` +
@ -1939,10 +1936,8 @@ async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
"call-1",
"exec",
r#"
import { store } from "@openai/code_mode";
store("nb", { title: "Notebook", items: [1, true, null] });
add_content("stored");
text("stored");
"#,
),
ev_completed("resp-1"),
@ -1978,9 +1973,7 @@ add_content("stored");
"call-2",
"exec",
r#"
import { load } from "openai/code_mode";
add_content(JSON.stringify(load("nb")));
text(JSON.stringify(load("nb")));
"#,
),
ev_completed("resp-3"),