From da616136ccff31142b159e97da67705bf0ab7555 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Mon, 9 Mar 2026 21:56:27 -0600 Subject: [PATCH] Add code_mode experimental feature (#13418) A much narrower and more isolated (no node features) version of js_repl --- codex-rs/core/config.schema.json | 6 + codex-rs/core/src/codex.rs | 10 + codex-rs/core/src/features.rs | 8 + codex-rs/core/src/project_doc.rs | 8 + codex-rs/core/src/tools/code_mode.rs | 442 ++++++++++++++++++ codex-rs/core/src/tools/code_mode_bridge.js | 98 ++++ codex-rs/core/src/tools/code_mode_runner.cjs | 137 ++++++ codex-rs/core/src/tools/context.rs | 1 + codex-rs/core/src/tools/handlers/code_mode.rs | 55 +++ codex-rs/core/src/tools/handlers/mod.rs | 2 + codex-rs/core/src/tools/mod.rs | 1 + codex-rs/core/src/tools/spec.rs | 57 +++ codex-rs/core/tests/suite/code_mode.rs | 132 ++++++ codex-rs/core/tests/suite/mod.rs | 1 + 14 files changed, 958 insertions(+) create mode 100644 codex-rs/core/src/tools/code_mode.rs create mode 100644 codex-rs/core/src/tools/code_mode_bridge.js create mode 100644 codex-rs/core/src/tools/code_mode_runner.cjs create mode 100644 codex-rs/core/src/tools/handlers/code_mode.rs create mode 100644 codex-rs/core/tests/suite/code_mode.rs diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index a1ca57d7a..49e3fc944 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -326,6 +326,9 @@ "child_agents_md": { "type": "boolean" }, + "code_mode": { + "type": "boolean" + }, "codex_git_commit": { "type": "boolean" }, @@ -1806,6 +1809,9 @@ "child_agents_md": { "type": "boolean" }, + "code_mode": { + "type": "boolean" + }, "codex_git_commit": { "type": "boolean" }, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index b30b93cc1..6d3c53f83 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -405,6 +405,16 @@ impl Codex { warn!("{message}"); config.startup_warnings.push(message); } + if config.features.enabled(Feature::CodeMode) + && let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await + { + let message = format!( + "Disabled `code_mode` for this session because the configured Node runtime is unavailable or incompatible. {err}" + ); + warn!("{message}"); + let _ = config.features.disable(Feature::CodeMode); + config.startup_warnings.push(message); + } let allowed_skills_for_implicit_invocation = loaded_skills.allowed_skills_for_implicit_invocation(); diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 529203e1c..af8326bab 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -83,6 +83,8 @@ pub enum Feature { // Experimental /// Enable JavaScript REPL tools backed by a persistent Node kernel. JsRepl, + /// Enable a minimal JavaScript mode backed by Node's built-in vm runtime. + CodeMode, /// Only expose js_repl tools directly to the model. JsReplToolsOnly, /// Use the single unified PTY-backed exec tool. @@ -510,6 +512,12 @@ pub const FEATURES: &[FeatureSpec] = &[ }, default_enabled: false, }, + FeatureSpec { + id: Feature::CodeMode, + key: "code_mode", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::JsReplToolsOnly, key: "js_repl_tools_only", diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 6253d83fb..0ef10535f 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -25,6 +25,7 @@ use crate::plugins::PluginCapabilitySummary; use crate::plugins::render_plugins_section; use crate::skills::SkillMetadata; use crate::skills::render_skills_section; +use crate::tools::code_mode; use codex_app_server_protocol::ConfigLayerSource; use dunce::canonicalize as normalize_path; use std::path::PathBuf; @@ -118,6 +119,13 @@ pub(crate) async fn get_user_instructions( output.push_str(&plugin_section); } + if let Some(code_mode_section) = code_mode::instructions(config) { + if !output.is_empty() { + output.push_str("\n\n"); + } + output.push_str(&code_mode_section); + } + let skills_section = skills.and_then(render_skills_section); if let Some(skills_section) = skills_section { if !output.is_empty() { diff --git a/codex-rs/core/src/tools/code_mode.rs b/codex-rs/core/src/tools/code_mode.rs new file mode 100644 index 000000000..6203763e9 --- /dev/null +++ b/codex-rs/core/src/tools/code_mode.rs @@ -0,0 +1,442 @@ +use std::process::ExitStatus; +use std::sync::Arc; + +use crate::client_common::tools::ToolSpec; +use crate::codex::Session; +use crate::codex::TurnContext; +use crate::config::Config; +use crate::exec_env::create_env; +use crate::features::Feature; +use crate::function_tool::FunctionCallError; +use crate::tools::ToolRouter; +use crate::tools::context::SharedTurnDiffTracker; +use crate::tools::context::ToolPayload; +use crate::tools::js_repl::resolve_compatible_node; +use crate::tools::router::ToolCall; +use crate::tools::router::ToolCallSource; +use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputPayload; +use codex_protocol::models::ResponseInputItem; +use serde::Deserialize; +use serde::Serialize; +use serde_json::Value as JsonValue; +use serde_json::json; +use tokio::io::AsyncBufReadExt; +use tokio::io::AsyncReadExt; +use tokio::io::AsyncWriteExt; +use tokio::io::BufReader; + +const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs"); +const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js"); + +#[derive(Clone)] +struct ExecContext { + session: Arc, + turn: Arc, + tracker: SharedTurnDiffTracker, +} + +#[derive(Clone, Copy, Debug, Deserialize, Eq, PartialEq, Serialize)] +#[serde(rename_all = "snake_case")] +enum CodeModeToolKind { + Function, + Freeform, +} + +#[derive(Clone, Debug, Serialize)] +struct EnabledTool { + name: String, + kind: CodeModeToolKind, +} + +#[derive(Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum HostToNodeMessage { + Init { + source: String, + }, + Response { + id: String, + content_items: Vec, + }, +} + +#[derive(Deserialize)] +#[serde(tag = "type", rename_all = "snake_case")] +enum NodeToHostMessage { + ToolCall { + id: String, + name: String, + input: String, + }, + Result { + content_items: Vec, + }, +} + +pub(crate) fn instructions(config: &Config) -> Option { + if !config.features.enabled(Feature::CodeMode) { + return None; + } + + let mut section = String::from("## Code Mode\n"); + section.push_str( + "- Use `code_mode` for JavaScript execution in a Node-backed `node:vm` context.\n", + ); + section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n"); + section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n"); + section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n"); + section.push_str("- Call nested tools with `await tools[name](args)` or identifier wrappers like `await exec_command(args)` when the tool name is a valid JavaScript identifier. Nested tool calls resolve to arrays of content items.\n"); + section.push_str( + "- Function tools require JSON object arguments. Freeform tools require raw strings.\n", + ); + section.push_str("- `add_content(value)` is synchronous. It accepts a content item or an array of content items, so `add_content(await exec_command(...))` returns the same content items a direct tool call would expose to the model.\n"); + section + .push_str("- Only content passed to `add_content(value)` is surfaced back to the model."); + Some(section) +} + +pub(crate) async fn execute( + session: Arc, + turn: Arc, + tracker: SharedTurnDiffTracker, + code: String, +) -> Result, FunctionCallError> { + let exec = ExecContext { + session, + turn, + tracker, + }; + let enabled_tools = build_enabled_tools(&exec); + let source = build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?; + execute_node(exec, source) + .await + .map_err(FunctionCallError::RespondToModel) +} + +async fn execute_node( + exec: ExecContext, + source: String, +) -> Result, String> { + let node_path = resolve_compatible_node(exec.turn.config.js_repl_node_path.as_deref()).await?; + + let env = create_env(&exec.turn.shell_environment_policy, None); + let mut cmd = tokio::process::Command::new(&node_path); + cmd.arg("--eval"); + cmd.arg(CODE_MODE_RUNNER_SOURCE); + cmd.current_dir(&exec.turn.cwd); + cmd.env_clear(); + cmd.envs(env); + cmd.stdin(std::process::Stdio::piped()) + .stdout(std::process::Stdio::piped()) + .stderr(std::process::Stdio::piped()) + .kill_on_drop(true); + + let mut child = cmd + .spawn() + .map_err(|err| format!("failed to start code_mode Node runtime: {err}"))?; + let stdout = child + .stdout + .take() + .ok_or_else(|| "code_mode runner missing stdout".to_string())?; + let stderr = child + .stderr + .take() + .ok_or_else(|| "code_mode runner missing stderr".to_string())?; + let mut stdin = child + .stdin + .take() + .ok_or_else(|| "code_mode runner missing stdin".to_string())?; + + let stderr_task = tokio::spawn(async move { + let mut reader = BufReader::new(stderr); + let mut buf = Vec::new(); + let _ = reader.read_to_end(&mut buf).await; + String::from_utf8_lossy(&buf).trim().to_string() + }); + + write_message(&mut stdin, &HostToNodeMessage::Init { source }).await?; + + let mut stdout_lines = BufReader::new(stdout).lines(); + let mut final_content_items = None; + while let Some(line) = stdout_lines + .next_line() + .await + .map_err(|err| format!("failed to read code_mode runner stdout: {err}"))? + { + if line.trim().is_empty() { + continue; + } + let message: NodeToHostMessage = serde_json::from_str(&line) + .map_err(|err| format!("invalid code_mode runner message: {err}; line={line}"))?; + match message { + NodeToHostMessage::ToolCall { id, name, input } => { + let response = HostToNodeMessage::Response { + id, + content_items: call_nested_tool(exec.clone(), name, input).await, + }; + write_message(&mut stdin, &response).await?; + } + NodeToHostMessage::Result { content_items } => { + final_content_items = Some(output_content_items_from_json_values(content_items)?); + break; + } + } + } + + drop(stdin); + + let status = child + .wait() + .await + .map_err(|err| format!("failed to wait for code_mode runner: {err}"))?; + let stderr = stderr_task + .await + .map_err(|err| format!("failed to collect code_mode stderr: {err}"))?; + + match final_content_items { + Some(content_items) if status.success() => Ok(content_items), + Some(_) => Err(format_runner_failure( + "code_mode execution failed", + status, + &stderr, + )), + None => Err(format_runner_failure( + "code_mode runner exited without returning a result", + status, + &stderr, + )), + } +} + +async fn write_message( + stdin: &mut tokio::process::ChildStdin, + message: &HostToNodeMessage, +) -> Result<(), String> { + let line = serde_json::to_string(message) + .map_err(|err| format!("failed to serialize code_mode message: {err}"))?; + stdin + .write_all(line.as_bytes()) + .await + .map_err(|err| format!("failed to write code_mode message: {err}"))?; + stdin + .write_all(b"\n") + .await + .map_err(|err| format!("failed to write code_mode message newline: {err}"))?; + stdin + .flush() + .await + .map_err(|err| format!("failed to flush code_mode message: {err}")) +} + +fn append_stderr(message: String, stderr: &str) -> String { + if stderr.trim().is_empty() { + return message; + } + format!("{message}\n\nnode stderr:\n{stderr}") +} + +fn format_runner_failure(message: &str, status: ExitStatus, stderr: &str) -> String { + append_stderr(format!("{message} (status {status})"), stderr) +} + +fn build_source(user_code: &str, enabled_tools: &[EnabledTool]) -> Result { + let enabled_tools_json = serde_json::to_string(enabled_tools) + .map_err(|err| format!("failed to serialize enabled tools: {err}"))?; + Ok(CODE_MODE_BRIDGE_SOURCE + .replace( + "__CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__", + &enabled_tools_json, + ) + .replace("__CODE_MODE_USER_CODE_PLACEHOLDER__", user_code)) +} + +fn build_enabled_tools(exec: &ExecContext) -> Vec { + let nested_tools_config = exec.turn.tools_config.for_code_mode_nested_tools(); + let router = ToolRouter::from_config( + &nested_tools_config, + None, + None, + exec.turn.dynamic_tools.as_slice(), + ); + let mut out = router + .specs() + .into_iter() + .map(|spec| EnabledTool { + name: spec.name().to_string(), + kind: tool_kind_for_spec(&spec), + }) + .filter(|tool| tool.name != "code_mode") + .collect::>(); + out.sort_by(|left, right| left.name.cmp(&right.name)); + out.dedup_by(|left, right| left.name == right.name); + out +} + +async fn call_nested_tool(exec: ExecContext, tool_name: String, input: String) -> Vec { + if tool_name == "code_mode" { + return error_content_items_json("code_mode cannot invoke itself".to_string()); + } + + let nested_config = exec.turn.tools_config.for_code_mode_nested_tools(); + let router = ToolRouter::from_config( + &nested_config, + None, + None, + exec.turn.dynamic_tools.as_slice(), + ); + + let specs = router.specs(); + let payload = match build_nested_tool_payload(&specs, &tool_name, input) { + Ok(payload) => payload, + Err(error) => return error_content_items_json(error), + }; + + let call = ToolCall { + tool_name: tool_name.clone(), + call_id: format!("code_mode-{}", uuid::Uuid::new_v4()), + payload, + }; + let response = router + .dispatch_tool_call( + Arc::clone(&exec.session), + Arc::clone(&exec.turn), + Arc::clone(&exec.tracker), + call, + ToolCallSource::CodeMode, + ) + .await; + + match response { + Ok(response) => { + json_values_from_output_content_items(content_items_from_response_input(response)) + } + Err(error) => error_content_items_json(error.to_string()), + } +} + +fn tool_kind_for_spec(spec: &ToolSpec) -> CodeModeToolKind { + if matches!(spec, ToolSpec::Freeform(_)) { + CodeModeToolKind::Freeform + } else { + CodeModeToolKind::Function + } +} + +fn tool_kind_for_name(specs: &[ToolSpec], tool_name: &str) -> Result { + specs + .iter() + .find(|spec| spec.name() == tool_name) + .map(tool_kind_for_spec) + .ok_or_else(|| format!("tool `{tool_name}` is not enabled in code_mode")) +} + +fn build_nested_tool_payload( + specs: &[ToolSpec], + tool_name: &str, + input: String, +) -> Result { + let actual_kind = tool_kind_for_name(specs, tool_name)?; + match actual_kind { + CodeModeToolKind::Function => { + validate_function_arguments(tool_name, &input)?; + Ok(ToolPayload::Function { arguments: input }) + } + CodeModeToolKind::Freeform => Ok(ToolPayload::Custom { input }), + } +} + +fn validate_function_arguments(tool_name: &str, input: &str) -> Result<(), String> { + let value: JsonValue = serde_json::from_str(input) + .map_err(|err| format!("tool `{tool_name}` expects a JSON object for arguments: {err}"))?; + if value.is_object() { + Ok(()) + } else { + Err(format!( + "tool `{tool_name}` expects a JSON object for arguments" + )) + } +} + +fn content_items_from_response_input( + response: ResponseInputItem, +) -> Vec { + match response { + ResponseInputItem::Message { content, .. } => content + .into_iter() + .map(function_output_content_item_from_content_item) + .collect(), + ResponseInputItem::FunctionCallOutput { output, .. } => { + content_items_from_function_output(output) + } + ResponseInputItem::CustomToolCallOutput { output, .. } => { + content_items_from_function_output(output) + } + ResponseInputItem::McpToolCallOutput { result, .. } => match result { + Ok(result) => { + content_items_from_function_output(FunctionCallOutputPayload::from(&result)) + } + Err(error) => vec![FunctionCallOutputContentItem::InputText { text: error }], + }, + } +} + +fn content_items_from_function_output( + output: FunctionCallOutputPayload, +) -> Vec { + match output.body { + FunctionCallOutputBody::Text(text) => { + vec![FunctionCallOutputContentItem::InputText { text }] + } + FunctionCallOutputBody::ContentItems(items) => items, + } +} + +fn function_output_content_item_from_content_item( + item: ContentItem, +) -> FunctionCallOutputContentItem { + match item { + ContentItem::InputText { text } | ContentItem::OutputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + ContentItem::InputImage { image_url } => FunctionCallOutputContentItem::InputImage { + image_url, + detail: None, + }, + } +} + +fn json_values_from_output_content_items( + content_items: Vec, +) -> Vec { + content_items + .into_iter() + .map(|item| match item { + FunctionCallOutputContentItem::InputText { text } => { + json!({ "type": "input_text", "text": text }) + } + FunctionCallOutputContentItem::InputImage { image_url, detail } => { + json!({ "type": "input_image", "image_url": image_url, "detail": detail }) + } + }) + .collect() +} + +fn output_content_items_from_json_values( + content_items: Vec, +) -> Result, String> { + content_items + .into_iter() + .enumerate() + .map(|(index, item)| { + serde_json::from_value(item) + .map_err(|err| format!("invalid code_mode content item at index {index}: {err}")) + }) + .collect() +} + +fn error_content_items_json(message: String) -> Vec { + vec![json!({ "type": "input_text", "text": message })] +} diff --git a/codex-rs/core/src/tools/code_mode_bridge.js b/codex-rs/core/src/tools/code_mode_bridge.js new file mode 100644 index 000000000..406d2f35b --- /dev/null +++ b/codex-rs/core/src/tools/code_mode_bridge.js @@ -0,0 +1,98 @@ +(async () => { +const __codexEnabledTools = __CODE_MODE_ENABLED_TOOLS_PLACEHOLDER__; +const __codexEnabledToolNames = __codexEnabledTools.map((tool) => tool.name); +const __codexToolKinds = new Map(__codexEnabledTools.map((tool) => [tool.name, tool.kind])); +const __codexContentItems = []; + +function __codexCloneContentItem(item) { + if (!item || typeof item !== 'object') { + throw new TypeError('content item must be an object'); + } + switch (item.type) { + case 'input_text': + if (typeof item.text !== 'string') { + throw new TypeError('content item "input_text" requires a string text field'); + } + return { type: 'input_text', text: item.text }; + case 'input_image': + if (typeof item.image_url !== 'string') { + throw new TypeError('content item "input_image" requires a string image_url field'); + } + return { type: 'input_image', image_url: item.image_url }; + default: + throw new TypeError(`unsupported content item type "${item.type}"`); + } +} + +function __codexNormalizeContentItems(value) { + if (Array.isArray(value)) { + return value.flatMap((entry) => __codexNormalizeContentItems(entry)); + } + return [__codexCloneContentItem(value)]; +} + +async function __codexCallTool(name, args) { + const toolKind = __codexToolKinds.get(name); + if (toolKind === undefined) { + throw new Error(`Tool "${name}" is not enabled in code_mode`); + } + if (toolKind === 'freeform') { + if (typeof args !== 'string') { + throw new TypeError(`Tool "${name}" expects a string input`); + } + return await __codex_tool_call(name, args); + } + if (args === undefined) { + return await __codex_tool_call(name, '{}'); + } + if (!args || typeof args !== 'object' || Array.isArray(args)) { + throw new TypeError(`Tool "${name}" expects a JSON object for arguments`); + } + return await __codex_tool_call(name, JSON.stringify(args)); +} + +Object.defineProperty(globalThis, '__codexContentItems', { + value: __codexContentItems, + configurable: true, + enumerable: false, + writable: false, +}); + +globalThis.codex = { + enabledTools: Object.freeze(__codexEnabledToolNames.slice()), +}; + +globalThis.add_content = (value) => { + const contentItems = __codexNormalizeContentItems(value); + __codexContentItems.push(...contentItems); + return contentItems; +}; + +globalThis.tools = new Proxy(Object.create(null), { + get(_target, prop) { + const name = String(prop); + return async (args) => __codexCallTool(name, args); + }, +}); + +globalThis.console = Object.freeze({ + log() {}, + info() {}, + warn() {}, + error() {}, + debug() {}, +}); + +for (const name of __codexEnabledToolNames) { + if (/^[A-Za-z_$][0-9A-Za-z_$]*$/.test(name) && !(name in globalThis)) { + Object.defineProperty(globalThis, name, { + value: async (args) => __codexCallTool(name, args), + configurable: true, + enumerable: false, + writable: false, + }); + } +} + +__CODE_MODE_USER_CODE_PLACEHOLDER__ +})(); diff --git a/codex-rs/core/src/tools/code_mode_runner.cjs b/codex-rs/core/src/tools/code_mode_runner.cjs new file mode 100644 index 000000000..ca9fcac67 --- /dev/null +++ b/codex-rs/core/src/tools/code_mode_runner.cjs @@ -0,0 +1,137 @@ +'use strict'; + +const readline = require('node:readline'); +const vm = require('node:vm'); + +function createProtocol() { + const rl = readline.createInterface({ + input: process.stdin, + crlfDelay: Infinity, + }); + + let nextId = 0; + const pending = new Map(); + let initResolve; + let initReject; + const init = new Promise((resolve, reject) => { + initResolve = resolve; + initReject = reject; + }); + + rl.on('line', (line) => { + if (!line.trim()) { + return; + } + + let message; + try { + message = JSON.parse(line); + } catch (error) { + initReject(error); + return; + } + + if (message.type === 'init') { + initResolve(message); + return; + } + + if (message.type === 'response') { + const entry = pending.get(message.id); + if (!entry) { + return; + } + pending.delete(message.id); + entry.resolve(Array.isArray(message.content_items) ? message.content_items : []); + return; + } + + initReject(new Error(`Unknown protocol message type: ${message.type}`)); + }); + + rl.on('close', () => { + const error = new Error('stdin closed'); + initReject(error); + for (const entry of pending.values()) { + entry.reject(error); + } + pending.clear(); + }); + + function send(message) { + return new Promise((resolve, reject) => { + process.stdout.write(`${JSON.stringify(message)}\n`, (error) => { + if (error) { + reject(error); + } else { + resolve(); + } + }); + }); + } + + function request(type, payload) { + const id = `msg-${++nextId}`; + return new Promise((resolve, reject) => { + pending.set(id, { resolve, reject }); + void send({ type, id, ...payload }).catch((error) => { + pending.delete(id); + reject(error); + }); + }); + } + + return { init, request, send }; +} + +function readContentItems(context) { + try { + const serialized = vm.runInContext( + 'JSON.stringify(globalThis.__codexContentItems ?? [])', + context + ); + const contentItems = JSON.parse(serialized); + return Array.isArray(contentItems) ? contentItems : []; + } catch { + return []; + } +} + +async function main() { + const protocol = createProtocol(); + const request = await protocol.init; + const context = vm.createContext({ + __codex_tool_call: async (name, input) => + protocol.request('tool_call', { + name: String(name), + input, + }), + }); + + try { + await vm.runInContext(request.source, context, { + displayErrors: true, + microtaskMode: 'afterEvaluate', + }); + await protocol.send({ + type: 'result', + content_items: readContentItems(context), + }); + process.exit(0); + } catch (error) { + process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`); + await protocol.send({ + type: 'result', + content_items: readContentItems(context), + }); + process.exit(1); + } +} + +void main().catch(async (error) => { + try { + process.stderr.write(`${String(error && error.stack ? error.stack : error)}\n`); + } finally { + process.exitCode = 1; + } +}); diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index deab5cc0e..0bfebfa64 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -22,6 +22,7 @@ pub type SharedTurnDiffTracker = Arc>; pub enum ToolCallSource { Direct, JsRepl, + CodeMode, } #[derive(Clone)] diff --git a/codex-rs/core/src/tools/handlers/code_mode.rs b/codex-rs/core/src/tools/handlers/code_mode.rs new file mode 100644 index 000000000..c4994d059 --- /dev/null +++ b/codex-rs/core/src/tools/handlers/code_mode.rs @@ -0,0 +1,55 @@ +use async_trait::async_trait; + +use crate::features::Feature; +use crate::function_tool::FunctionCallError; +use crate::tools::code_mode; +use crate::tools::context::ContentToolOutput; +use crate::tools::context::ToolInvocation; +use crate::tools::context::ToolOutputBox; +use crate::tools::context::ToolPayload; +use crate::tools::registry::ToolHandler; +use crate::tools::registry::ToolKind; + +pub struct CodeModeHandler; + +#[async_trait] +impl ToolHandler for CodeModeHandler { + fn kind(&self) -> ToolKind { + ToolKind::Function + } + + fn matches_kind(&self, payload: &ToolPayload) -> bool { + matches!(payload, ToolPayload::Custom { .. }) + } + + async fn handle(&self, invocation: ToolInvocation) -> Result { + let ToolInvocation { + session, + turn, + tracker, + payload, + .. + } = invocation; + + if !session.features().enabled(Feature::CodeMode) { + return Err(FunctionCallError::RespondToModel( + "code_mode is disabled by feature flag".to_string(), + )); + } + + let code = match payload { + ToolPayload::Custom { input } => input, + _ => { + return Err(FunctionCallError::RespondToModel( + "code_mode expects raw JavaScript source text".to_string(), + )); + } + }; + + let content_items = code_mode::execute(session, turn, tracker, code).await?; + Ok(Box::new(ContentToolOutput { + content: content_items, + success: Some(true), + })) + } +} diff --git a/codex-rs/core/src/tools/handlers/mod.rs b/codex-rs/core/src/tools/handlers/mod.rs index c1452e3ff..38d0f74f4 100644 --- a/codex-rs/core/src/tools/handlers/mod.rs +++ b/codex-rs/core/src/tools/handlers/mod.rs @@ -1,6 +1,7 @@ pub(crate) mod agent_jobs; pub mod apply_patch; mod artifacts; +mod code_mode; mod dynamic; mod grep_files; mod js_repl; @@ -32,6 +33,7 @@ use crate::sandboxing::merge_permission_profiles; use crate::sandboxing::normalize_additional_permissions; pub use apply_patch::ApplyPatchHandler; pub use artifacts::ArtifactsHandler; +pub use code_mode::CodeModeHandler; use codex_protocol::models::PermissionProfile; use codex_protocol::protocol::AskForApproval; pub use dynamic::DynamicToolHandler; diff --git a/codex-rs/core/src/tools/mod.rs b/codex-rs/core/src/tools/mod.rs index 27fe51512..677e9d5f9 100644 --- a/codex-rs/core/src/tools/mod.rs +++ b/codex-rs/core/src/tools/mod.rs @@ -1,3 +1,4 @@ +pub mod code_mode; pub mod context; pub mod events; pub(crate) mod handlers; diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index 219484c44..42a7558c6 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -67,6 +67,7 @@ pub(crate) struct ToolsConfig { pub search_tool: bool, pub request_permission_enabled: bool, pub request_permissions_tool_enabled: bool, + pub code_mode_enabled: bool, pub js_repl_enabled: bool, pub js_repl_tools_only: bool, pub collab_tools: bool, @@ -94,6 +95,7 @@ impl ToolsConfig { session_source, } = params; let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); + let include_code_mode = features.enabled(Feature::CodeMode); let include_js_repl = features.enabled(Feature::JsRepl); let include_js_repl_tools_only = include_js_repl && features.enabled(Feature::JsReplToolsOnly); @@ -170,6 +172,7 @@ impl ToolsConfig { search_tool: include_search_tool, request_permission_enabled, request_permissions_tool_enabled, + code_mode_enabled: include_code_mode, js_repl_enabled: include_js_repl, js_repl_tools_only: include_js_repl_tools_only, collab_tools: include_collab_tools, @@ -196,6 +199,12 @@ impl ToolsConfig { self.web_search_config = web_search_config; self } + + pub fn for_code_mode_nested_tools(&self) -> Self { + let mut nested = self.clone(); + nested.code_mode_enabled = false; + nested + } } fn supports_image_generation(model_info: &ModelInfo) -> bool { @@ -1523,6 +1532,32 @@ fn create_js_repl_reset_tool() -> ToolSpec { }) } +fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec { + const CODE_MODE_FREEFORM_GRAMMAR: &str = r#" +start: source +source: /[\s\S]+/ +"#; + + let enabled_list = if enabled_tool_names.is_empty() { + "none".to_string() + } else { + enabled_tool_names.join(", ") + }; + let description = format!( + "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, call nested tools with `await tools[name](args)` or identifier wrappers like `await shell(args)` when the tool name is a valid JS identifier. Nested tool calls resolve to arrays of content items. Function tools require JSON object arguments. Freeform tools require raw strings. Use synchronous `add_content(value)` with a content item or content-item array, including `add_content(await exec_command(...))`, to return the same content items a direct tool call would expose to the model. Only content passed to `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." + ); + + ToolSpec::Freeform(FreeformTool { + name: "code_mode".to_string(), + description, + format: FreeformToolFormat { + r#type: "grammar".to_string(), + syntax: "lark".to_string(), + definition: CODE_MODE_FREEFORM_GRAMMAR.to_string(), + }, + }) +} + fn create_list_mcp_resources_tool() -> ToolSpec { let properties = BTreeMap::from([ ( @@ -1829,6 +1864,7 @@ pub(crate) fn build_specs( ) -> ToolRegistryBuilder { use crate::tools::handlers::ApplyPatchHandler; use crate::tools::handlers::ArtifactsHandler; + use crate::tools::handlers::CodeModeHandler; use crate::tools::handlers::DynamicToolHandler; use crate::tools::handlers::GrepFilesHandler; use crate::tools::handlers::JsReplHandler; @@ -1865,11 +1901,32 @@ pub(crate) fn build_specs( default_mode_request_user_input: config.default_mode_request_user_input, }); let search_tool_handler = Arc::new(SearchToolBm25Handler); + let code_mode_handler = Arc::new(CodeModeHandler); let js_repl_handler = Arc::new(JsReplHandler); let js_repl_reset_handler = Arc::new(JsReplResetHandler); let artifacts_handler = Arc::new(ArtifactsHandler); let request_permission_enabled = config.request_permission_enabled; + if config.code_mode_enabled { + let nested_config = config.for_code_mode_nested_tools(); + let (nested_specs, _) = build_specs( + &nested_config, + mcp_tools.clone(), + app_tools.clone(), + dynamic_tools, + ) + .build(); + let mut enabled_tool_names = nested_specs + .into_iter() + .map(|spec| spec.spec.name().to_string()) + .filter(|name| name != "code_mode") + .collect::>(); + enabled_tool_names.sort(); + enabled_tool_names.dedup(); + builder.push_spec(create_code_mode_tool(&enabled_tool_names)); + builder.register_handler("code_mode", code_mode_handler); + } + match &config.shell_type { ConfigShellToolType::Default => { builder.push_spec_with_parallel_support( diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs new file mode 100644 index 000000000..778ba137d --- /dev/null +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -0,0 +1,132 @@ +#![allow(clippy::expect_used, clippy::unwrap_used)] + +use anyhow::Result; +use codex_core::features::Feature; +use core_test_support::responses; +use core_test_support::responses::ResponseMock; +use core_test_support::responses::ResponsesRequest; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_custom_tool_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::sse; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use pretty_assertions::assert_eq; +use regex_lite::Regex; +use std::fs; +use wiremock::MockServer; + +fn custom_tool_output_text_and_success( + req: &ResponsesRequest, + call_id: &str, +) -> (String, Option) { + let (output, success) = req + .custom_tool_call_output_content_and_success(call_id) + .expect("custom tool output should be present"); + (output.unwrap_or_default(), success) +} + +async fn run_code_mode_turn( + server: &MockServer, + prompt: &str, + code: &str, + include_apply_patch: bool, +) -> Result<(TestCodex, ResponseMock)> { + let mut builder = test_codex().with_config(move |config| { + let _ = config.features.enable(Feature::CodeMode); + config.include_apply_patch_tool = include_apply_patch; + }); + let test = builder.build(server).await?; + + responses::mount_sse_once( + server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call("call-1", "code_mode", code), + ev_completed("resp-1"), + ]), + ) + .await; + + let second_mock = responses::mount_sse_once( + server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + test.submit_turn(prompt).await?; + Ok((test, second_mock)) +} + +#[cfg_attr(windows, ignore = "no exec_command on Windows")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_return_exec_command_output() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let (_test, second_mock) = run_code_mode_turn( + &server, + "use code_mode to run exec_command", + r#" +add_content(await exec_command({ cmd: "printf code_mode_exec_marker" })); +"#, + false, + ) + .await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode call failed unexpectedly: {output}" + ); + let regex = Regex::new( + r#"(?ms)^Chunk ID: [[:xdigit:]]+ +Wall time: [0-9]+(?:\.[0-9]+)? seconds +Process exited with code 0 +Original token count: [0-9]+ +Output: +code_mode_exec_marker +?$"#, + )?; + assert!( + regex.is_match(&output), + "expected exec_command output envelope to match regex, got: {output}" + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let file_name = "code_mode_apply_patch.txt"; + let patch = format!( + "*** Begin Patch\n*** Add File: {file_name}\n+hello from code_mode\n*** End Patch\n" + ); + let code = format!("const items = await apply_patch({patch:?});\nadd_content(items);\n"); + + let (test, second_mock) = + run_code_mode_turn(&server, "use code_mode to run apply_patch", &code, true).await?; + + let req = second_mock.single_request(); + let (output, success) = custom_tool_output_text_and_success(&req, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode apply_patch call failed unexpectedly: {output}" + ); + + let file_path = test.cwd_path().join(file_name); + assert_eq!(fs::read_to_string(&file_path)?, "hello from code_mode\n"); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 61dfbe03e..0695fcb19 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -65,6 +65,7 @@ mod auth_refresh; mod cli_stream; mod client; mod client_websockets; +mod code_mode; mod codex_delegate; mod collaboration_instructions; mod compact;