From cfe3f6821ae91f38d6d6f4e86dcbb0c3a29c123f Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Thu, 12 Mar 2026 11:13:35 -0700 Subject: [PATCH] Cleanup code_mode tool descriptions (#14480) Move to separate files and clarify a bit. --- codex-rs/Cargo.lock | 2 +- codex-rs/core/src/project_doc.rs | 8 ---- .../core/src/tools/code_mode/description.md | 19 ++++++++ codex-rs/core/src/tools/code_mode/mod.rs | 48 ++++++------------- codex-rs/core/src/tools/code_mode/runner.cjs | 4 +- .../src/tools/code_mode/wait_description.md | 8 ++++ codex-rs/core/src/tools/spec.rs | 17 ++----- codex-rs/core/tests/suite/code_mode.rs | 6 +-- 8 files changed, 53 insertions(+), 59 deletions(-) create mode 100644 codex-rs/core/src/tools/code_mode/description.md create mode 100644 codex-rs/core/src/tools/code_mode/wait_description.md diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock index 79bfc73ad..2969e7522 100644 --- a/codex-rs/Cargo.lock +++ b/codex-rs/Cargo.lock @@ -1439,6 +1439,7 @@ dependencies = [ "codex-utils-cargo-bin", "codex-utils-cli", "codex-utils-json-to-toml", + "codex-utils-pty", "core_test_support", "futures", "opentelemetry", @@ -2438,7 +2439,6 @@ dependencies = [ "anyhow", "chrono", "clap", - "codex-otel", "codex-protocol", "dirs", "log", diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs index 1dc518982..bae72a460 100644 --- a/codex-rs/core/src/project_doc.rs +++ b/codex-rs/core/src/project_doc.rs @@ -25,7 +25,6 @@ use crate::plugins::PluginCapabilitySummary; use crate::plugins::render_plugins_section; use crate::skills::SkillMetadata; use crate::skills::render_skills_section; -use crate::tools::code_mode; use codex_app_server_protocol::ConfigLayerSource; use dunce::canonicalize as normalize_path; use std::path::PathBuf; @@ -120,13 +119,6 @@ pub(crate) async fn get_user_instructions( output.push_str(&plugin_section); } - if let Some(code_mode_section) = code_mode::instructions(config) { - if !output.is_empty() { - output.push_str("\n\n"); - } - output.push_str(&code_mode_section); - } - let skills_section = skills.and_then(render_skills_section); if let Some(skills_section) = skills_section { if !output.is_empty() { diff --git a/codex-rs/core/src/tools/code_mode/description.md b/codex-rs/core/src/tools/code_mode/description.md new file mode 100644 index 000000000..b494ef52b --- /dev/null +++ b/codex-rs/core/src/tools/code_mode/description.md @@ -0,0 +1,19 @@ +## exec +- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console). +- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences. +- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js` +- Tool methods take either string or object as parameter. +- They return either a structured value or a string based on the description above. + +- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values. + +- Methods available in `@openai/code_mode` module: +- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values. +- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL. +- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session. +- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing. + +- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens. +- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running. +- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running. + diff --git a/codex-rs/core/src/tools/code_mode/mod.rs b/codex-rs/core/src/tools/code_mode/mod.rs index 1b51cfc2f..f6561c518 100644 --- a/codex-rs/core/src/tools/code_mode/mod.rs +++ b/codex-rs/core/src/tools/code_mode/mod.rs @@ -14,8 +14,6 @@ use serde_json::Value as JsonValue; use crate::client_common::tools::ToolSpec; use crate::codex::Session; use crate::codex::TurnContext; -use crate::config::Config; -use crate::features::Feature; use crate::tools::ToolRouter; use crate::tools::code_mode_description::augment_tool_spec_for_code_mode; use crate::tools::code_mode_description::code_mode_tool_reference; @@ -32,6 +30,8 @@ use crate::unified_exec::resolve_max_tokens; const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs"); const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js"); +const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md"); +const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md"); pub(crate) const PUBLIC_TOOL_NAME: &str = "exec"; pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait"; @@ -60,38 +60,20 @@ enum CodeModeExecutionStatus { Terminated, } -pub(crate) fn instructions(config: &Config) -> Option { - if !config.features.enabled(Feature::CodeMode) { - return None; - } +pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String { + let enabled_list = if enabled_tool_names.is_empty() { + "none".to_string() + } else { + enabled_tool_names.join(", ") + }; + format!( + "{}\n- Enabled nested tools: {enabled_list}.", + CODE_MODE_DESCRIPTION_TEMPLATE.trim_end() + ) +} - let mut section = String::from("## Exec\n"); - section.push_str(&format!( - "- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n", - )); - section.push_str(&format!( - "- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n", - )); - section.push_str(&format!( - "- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n", - )); - section.push_str(&format!( - "- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n", - )); - section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n"); - section.push_str(&format!( - "- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n", - )); - section.push_str(&format!( - "- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n", - )); - section.push_str( - "- Function tools require JSON object arguments. Freeform tools require raw strings.\n", - ); - section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n"); - section - .push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model."); - Some(section) +pub(crate) fn wait_tool_description() -> &'static str { + CODE_MODE_WAIT_DESCRIPTION_TEMPLATE } async fn handle_node_message( diff --git a/codex-rs/core/src/tools/code_mode/runner.cjs b/codex-rs/core/src/tools/code_mode/runner.cjs index 02255b917..3f6cedd53 100644 --- a/codex-rs/core/src/tools/code_mode/runner.cjs +++ b/codex-rs/core/src/tools/code_mode/runner.cjs @@ -265,7 +265,7 @@ function codeModeWorkerMain() { 'set_max_output_tokens_per_exec_call', 'set_yield_time', 'store', - 'background', + 'yield_control', ], function initCodeModeModule() { this.setExport('load', load); @@ -289,7 +289,7 @@ function codeModeWorkerMain() { return normalized; }); this.setExport('store', store); - this.setExport('background', () => { + this.setExport('yield_control', () => { parentPort.postMessage({ type: 'yield' }); }); }, diff --git a/codex-rs/core/src/tools/code_mode/wait_description.md b/codex-rs/core/src/tools/code_mode/wait_description.md new file mode 100644 index 000000000..77ec11295 --- /dev/null +++ b/codex-rs/core/src/tools/code_mode/wait_description.md @@ -0,0 +1,8 @@ +- Use `exec_wait` only after `exec` returns `Script running with session ID ...`. +- `session_id` identifies the running `exec` session to resume. +- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout. +- `max_tokens` limits how much new output this wait call returns. +- `terminate: true` stops the running session instead of waiting for more output. +- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that session. +- If the session is still running, `exec_wait` may yield again with the same `session_id`. +- If the session has already finished, `exec_wait` returns the completed result and closes the session. diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index ab41a3b36..67094e0d0 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -8,9 +8,10 @@ use crate::features::Features; use crate::mcp_connection_manager::ToolInfo; use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig; use crate::original_image_detail::can_request_original_image_detail; -use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS; use crate::tools::code_mode::PUBLIC_TOOL_NAME; use crate::tools::code_mode::WAIT_TOOL_NAME; +use crate::tools::code_mode::tool_description as code_mode_tool_description; +use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description; use crate::tools::code_mode_description::augment_tool_spec_for_code_mode; use crate::tools::discoverable::DiscoverablePluginInfo; use crate::tools::discoverable::DiscoverableTool; @@ -627,7 +628,8 @@ fn create_exec_wait_tool() -> ToolSpec { ToolSpec::Function(ResponsesApiTool { name: WAIT_TOOL_NAME.to_string(), description: format!( - "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion." + "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion.\n{}", + code_mode_wait_tool_description().trim() ), strict: false, parameters: JsonSchema::Object { @@ -1877,18 +1879,9 @@ start: source source: /[\s\S]+/ "#; - let enabled_list = if enabled_tool_names.is_empty() { - "none".to_string() - } else { - enabled_tool_names.join(", ") - }; - let description = format!( - "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/.js`; MCP tools use `tools/mcp/.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns, and `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument with a default of `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The default wait timeout for `{WAIT_TOOL_NAME}` is {DEFAULT_WAIT_YIELD_TIME_MS}. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}." - ); - ToolSpec::Freeform(FreeformTool { name: PUBLIC_TOOL_NAME.to_string(), - description, + description: code_mode_tool_description(enabled_tool_names), format: FreeformToolFormat { r#type: "grammar".to_string(), syntax: "lark".to_string(), diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 976c553dc..4f17d0d6c 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -834,7 +834,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> { #[cfg_attr(windows, ignore = "no exec_command on Windows")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] -async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background() +async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control() -> Result<()> { skip_if_no_network!(Ok(())); @@ -1051,11 +1051,11 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() -> format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready"); let code = format!( r#" -import {{ background, output_text }} from "@openai/code_mode"; +import {{ yield_control, output_text }} from "@openai/code_mode"; import {{ exec_command }} from "tools.js"; output_text("before yield"); -background(); +yield_control(); await exec_command({{ cmd: {write_file_command:?} }}); output_text("after yield"); "#