Cleanup code_mode tool descriptions (#14480)

Move to separate files and clarify a bit.
2026-03-12 11:13:35 -07:00 · 2026-03-12 11:13:35 -07:00 · cfe3f6821a
commit cfe3f6821a
parent 774965f1e8
8 changed files with 53 additions and 59 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@ -1439,6 +1439,7 @@ dependencies = [
 "codex-utils-cargo-bin",
 "codex-utils-cli",
 "codex-utils-json-to-toml",
+ "codex-utils-pty",
 "core_test_support",
 "futures",
 "opentelemetry",
@ -2438,7 +2439,6 @@ dependencies = [
 "anyhow",
 "chrono",
 "clap",
- "codex-otel",
 "codex-protocol",
 "dirs",
 "log",
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@ -25,7 +25,6 @@ use crate::plugins::PluginCapabilitySummary;
 use crate::plugins::render_plugins_section;
 use crate::skills::SkillMetadata;
 use crate::skills::render_skills_section;
-use crate::tools::code_mode;
 use codex_app_server_protocol::ConfigLayerSource;
 use dunce::canonicalize as normalize_path;
 use std::path::PathBuf;
@ -120,13 +119,6 @@ pub(crate) async fn get_user_instructions(
        output.push_str(&plugin_section);
    }

-    if let Some(code_mode_section) = code_mode::instructions(config) {
-        if !output.is_empty() {
-            output.push_str("\n\n");
-        }
-        output.push_str(&code_mode_section);
-    }
-
    let skills_section = skills.and_then(render_skills_section);
    if let Some(skills_section) = skills_section {
        if !output.is_empty() {
--- a/codex-rs/core/src/tools/code_mode/description.md
+++ b/codex-rs/core/src/tools/code_mode/description.md
@ -0,0 +1,19 @@
+## exec
+- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
+- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
+- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js`
+- Tool methods take either string or object as parameter.
+- They return either a structured value or a string based on the description above.
+
+- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values.
+
+- Methods available in `@openai/code_mode` module:
+- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values.
+- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
+- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
+- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
+
+- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
+- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
+- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.
+
--- a/codex-rs/core/src/tools/code_mode/mod.rs
+++ b/codex-rs/core/src/tools/code_mode/mod.rs
@ -14,8 +14,6 @@ use serde_json::Value as JsonValue;
 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::codex::TurnContext;
-use crate::config::Config;
-use crate::features::Feature;
 use crate::tools::ToolRouter;
 use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
 use crate::tools::code_mode_description::code_mode_tool_reference;
@ -32,6 +30,8 @@ use crate::unified_exec::resolve_max_tokens;

 const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
 const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
+const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
+const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");

 pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
 pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
@ -60,38 +60,20 @@ enum CodeModeExecutionStatus {
    Terminated,
 }

-pub(crate) fn instructions(config: &Config) -> Option<String> {
-    if !config.features.enabled(Feature::CodeMode) {
-        return None;
-    }
+pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
+    let enabled_list = if enabled_tool_names.is_empty() {
+        "none".to_string()
+    } else {
+        enabled_tool_names.join(", ")
+    };
+    format!(
+        "{}\n- Enabled nested tools: {enabled_list}.",
+        CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
+    )
+}

-    let mut section = String::from("## Exec\n");
-    section.push_str(&format!(
-        "- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
-    ));
-    section.push_str(&format!(
-        "- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
-    ));
-    section.push_str(&format!(
-        "- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
-    ));
-    section.push_str(&format!(
-        "- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
-    ));
-    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
-    section.push_str(&format!(
-        "- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
-    ));
-    section.push_str(&format!(
-        "- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
-    ));
-    section.push_str(
-        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
-    );
-    section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
-    section
-        .push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
-    Some(section)
+pub(crate) fn wait_tool_description() -> &'static str {
+    CODE_MODE_WAIT_DESCRIPTION_TEMPLATE
 }

 async fn handle_node_message(
--- a/codex-rs/core/src/tools/code_mode/runner.cjs
+++ b/codex-rs/core/src/tools/code_mode/runner.cjs
@ -265,7 +265,7 @@ function codeModeWorkerMain() {
        'set_max_output_tokens_per_exec_call',
        'set_yield_time',
        'store',
-        'background',
+        'yield_control',
      ],
      function initCodeModeModule() {
        this.setExport('load', load);
@ -289,7 +289,7 @@ function codeModeWorkerMain() {
          return normalized;
        });
        this.setExport('store', store);
-        this.setExport('background', () => {
+        this.setExport('yield_control', () => {
          parentPort.postMessage({ type: 'yield' });
        });
      },
--- a/codex-rs/core/src/tools/code_mode/wait_description.md
+++ b/codex-rs/core/src/tools/code_mode/wait_description.md
@ -0,0 +1,8 @@
+- Use `exec_wait` only after `exec` returns `Script running with session ID ...`.
+- `session_id` identifies the running `exec` session to resume.
+- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout.
+- `max_tokens` limits how much new output this wait call returns.
+- `terminate: true` stops the running session instead of waiting for more output.
+- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that session.
+- If the session is still running, `exec_wait` may yield again with the same `session_id`.
+- If the session has already finished, `exec_wait` returns the completed result and closes the session.
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@ -8,9 +8,10 @@ use crate::features::Features;
 use crate::mcp_connection_manager::ToolInfo;
 use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
 use crate::original_image_detail::can_request_original_image_detail;
-use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS;
 use crate::tools::code_mode::PUBLIC_TOOL_NAME;
 use crate::tools::code_mode::WAIT_TOOL_NAME;
+use crate::tools::code_mode::tool_description as code_mode_tool_description;
+use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
 use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
 use crate::tools::discoverable::DiscoverablePluginInfo;
 use crate::tools::discoverable::DiscoverableTool;
@ -627,7 +628,8 @@ fn create_exec_wait_tool() -> ToolSpec {
    ToolSpec::Function(ResponsesApiTool {
        name: WAIT_TOOL_NAME.to_string(),
        description: format!(
-            "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion."
+            "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion.\n{}",
+            code_mode_wait_tool_description().trim()
        ),
        strict: false,
        parameters: JsonSchema::Object {
@ -1877,18 +1879,9 @@ start: source
 source: /[\s\S]+/
 "#;

-    let enabled_list = if enabled_tool_names.is_empty() {
-        "none".to_string()
-    } else {
-        enabled_tool_names.join(", ")
-    };
-    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns, and `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument with a default of `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The default wait timeout for `{WAIT_TOOL_NAME}` is {DEFAULT_WAIT_YIELD_TIME_MS}. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
-    );
-
    ToolSpec::Freeform(FreeformTool {
        name: PUBLIC_TOOL_NAME.to_string(),
-        description,
+        description: code_mode_tool_description(enabled_tool_names),
        format: FreeformToolFormat {
            r#type: "grammar".to_string(),
            syntax: "lark".to_string(),
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@ -834,7 +834,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {

 #[cfg_attr(windows, ignore = "no exec_command on Windows")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background()
+async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
 -> Result<()> {
    skip_if_no_network!(Ok(()));

@ -1051,11 +1051,11 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() ->
        format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
    let code = format!(
        r#"
-import {{ background, output_text }} from "@openai/code_mode";
+import {{ yield_control, output_text }} from "@openai/code_mode";
 import {{ exec_command }} from "tools.js";

 output_text("before yield");
-background();
+yield_control();
 await exec_command({{ cmd: {write_file_command:?} }});
 output_text("after yield");
 "#