From cfe3f6821ae91f38d6d6f4e86dcbb0c3a29c123f Mon Sep 17 00:00:00 2001
From: pakrym-oai <pakrym@openai.com>
Date: Thu, 12 Mar 2026 11:13:35 -0700
Subject: [PATCH] Cleanup code_mode tool descriptions (#14480)

Move to separate files and clarify a bit.
---
 codex-rs/Cargo.lock                           |  2 +-
 codex-rs/core/src/project_doc.rs              |  8 ----
 .../core/src/tools/code_mode/description.md   | 19 ++++++++
 codex-rs/core/src/tools/code_mode/mod.rs      | 48 ++++++-------------
 codex-rs/core/src/tools/code_mode/runner.cjs  |  4 +-
 .../src/tools/code_mode/wait_description.md   |  8 ++++
 codex-rs/core/src/tools/spec.rs               | 17 ++-----
 codex-rs/core/tests/suite/code_mode.rs        |  6 +--
 8 files changed, 53 insertions(+), 59 deletions(-)
 create mode 100644 codex-rs/core/src/tools/code_mode/description.md
 create mode 100644 codex-rs/core/src/tools/code_mode/wait_description.md

diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index 79bfc73ad..2969e7522 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -1439,6 +1439,7 @@ dependencies = [
  "codex-utils-cargo-bin",
  "codex-utils-cli",
  "codex-utils-json-to-toml",
+ "codex-utils-pty",
  "core_test_support",
  "futures",
  "opentelemetry",
@@ -2438,7 +2439,6 @@ dependencies = [
  "anyhow",
  "chrono",
  "clap",
- "codex-otel",
  "codex-protocol",
  "dirs",
  "log",
diff --git a/codex-rs/core/src/project_doc.rs b/codex-rs/core/src/project_doc.rs
index 1dc518982..bae72a460 100644
--- a/codex-rs/core/src/project_doc.rs
+++ b/codex-rs/core/src/project_doc.rs
@@ -25,7 +25,6 @@ use crate::plugins::PluginCapabilitySummary;
 use crate::plugins::render_plugins_section;
 use crate::skills::SkillMetadata;
 use crate::skills::render_skills_section;
-use crate::tools::code_mode;
 use codex_app_server_protocol::ConfigLayerSource;
 use dunce::canonicalize as normalize_path;
 use std::path::PathBuf;
@@ -120,13 +119,6 @@ pub(crate) async fn get_user_instructions(
         output.push_str(&plugin_section);
     }
 
-    if let Some(code_mode_section) = code_mode::instructions(config) {
-        if !output.is_empty() {
-            output.push_str("\n\n");
-        }
-        output.push_str(&code_mode_section);
-    }
-
     let skills_section = skills.and_then(render_skills_section);
     if let Some(skills_section) = skills_section {
         if !output.is_empty() {
diff --git a/codex-rs/core/src/tools/code_mode/description.md b/codex-rs/core/src/tools/code_mode/description.md
new file mode 100644
index 000000000..b494ef52b
--- /dev/null
+++ b/codex-rs/core/src/tools/code_mode/description.md
@@ -0,0 +1,19 @@
+## exec
+- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
+- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
+- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js`
+- Tool methods take either string or object as parameter.
+- They return either a structured value or a string based on the description above.
+
+- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values.
+
+- Methods available in `@openai/code_mode` module:
+- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values.
+- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
+- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
+- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
+
+- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
+- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
+- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.
+
diff --git a/codex-rs/core/src/tools/code_mode/mod.rs b/codex-rs/core/src/tools/code_mode/mod.rs
index 1b51cfc2f..f6561c518 100644
--- a/codex-rs/core/src/tools/code_mode/mod.rs
+++ b/codex-rs/core/src/tools/code_mode/mod.rs
@@ -14,8 +14,6 @@ use serde_json::Value as JsonValue;
 use crate::client_common::tools::ToolSpec;
 use crate::codex::Session;
 use crate::codex::TurnContext;
-use crate::config::Config;
-use crate::features::Feature;
 use crate::tools::ToolRouter;
 use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
 use crate::tools::code_mode_description::code_mode_tool_reference;
@@ -32,6 +30,8 @@ use crate::unified_exec::resolve_max_tokens;
 
 const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
 const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
+const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
+const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
 
 pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
 pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
@@ -60,38 +60,20 @@ enum CodeModeExecutionStatus {
     Terminated,
 }
 
-pub(crate) fn instructions(config: &Config) -> Option<String> {
-    if !config.features.enabled(Feature::CodeMode) {
-        return None;
-    }
+pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
+    let enabled_list = if enabled_tool_names.is_empty() {
+        "none".to_string()
+    } else {
+        enabled_tool_names.join(", ")
+    };
+    format!(
+        "{}\n- Enabled nested tools: {enabled_list}.",
+        CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
+    )
+}
 
-    let mut section = String::from("## Exec\n");
-    section.push_str(&format!(
-        "- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
-    ));
-    section.push_str(&format!(
-        "- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
-    ));
-    section.push_str(&format!(
-        "- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
-    ));
-    section.push_str(&format!(
-        "- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
-    ));
-    section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
-    section.push_str(&format!(
-        "- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
-    ));
-    section.push_str(&format!(
-        "- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
-    ));
-    section.push_str(
-        "- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
-    );
-    section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
-    section
-        .push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
-    Some(section)
+pub(crate) fn wait_tool_description() -> &'static str {
+    CODE_MODE_WAIT_DESCRIPTION_TEMPLATE
 }
 
 async fn handle_node_message(
diff --git a/codex-rs/core/src/tools/code_mode/runner.cjs b/codex-rs/core/src/tools/code_mode/runner.cjs
index 02255b917..3f6cedd53 100644
--- a/codex-rs/core/src/tools/code_mode/runner.cjs
+++ b/codex-rs/core/src/tools/code_mode/runner.cjs
@@ -265,7 +265,7 @@ function codeModeWorkerMain() {
         'set_max_output_tokens_per_exec_call',
         'set_yield_time',
         'store',
-        'background',
+        'yield_control',
       ],
       function initCodeModeModule() {
         this.setExport('load', load);
@@ -289,7 +289,7 @@ function codeModeWorkerMain() {
           return normalized;
         });
         this.setExport('store', store);
-        this.setExport('background', () => {
+        this.setExport('yield_control', () => {
           parentPort.postMessage({ type: 'yield' });
         });
       },
diff --git a/codex-rs/core/src/tools/code_mode/wait_description.md b/codex-rs/core/src/tools/code_mode/wait_description.md
new file mode 100644
index 000000000..77ec11295
--- /dev/null
+++ b/codex-rs/core/src/tools/code_mode/wait_description.md
@@ -0,0 +1,8 @@
+- Use `exec_wait` only after `exec` returns `Script running with session ID ...`.
+- `session_id` identifies the running `exec` session to resume.
+- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout.
+- `max_tokens` limits how much new output this wait call returns.
+- `terminate: true` stops the running session instead of waiting for more output.
+- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that session.
+- If the session is still running, `exec_wait` may yield again with the same `session_id`.
+- If the session has already finished, `exec_wait` returns the completed result and closes the session.
diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs
index ab41a3b36..67094e0d0 100644
--- a/codex-rs/core/src/tools/spec.rs
+++ b/codex-rs/core/src/tools/spec.rs
@@ -8,9 +8,10 @@ use crate::features::Features;
 use crate::mcp_connection_manager::ToolInfo;
 use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
 use crate::original_image_detail::can_request_original_image_detail;
-use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS;
 use crate::tools::code_mode::PUBLIC_TOOL_NAME;
 use crate::tools::code_mode::WAIT_TOOL_NAME;
+use crate::tools::code_mode::tool_description as code_mode_tool_description;
+use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
 use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
 use crate::tools::discoverable::DiscoverablePluginInfo;
 use crate::tools::discoverable::DiscoverableTool;
@@ -627,7 +628,8 @@ fn create_exec_wait_tool() -> ToolSpec {
     ToolSpec::Function(ResponsesApiTool {
         name: WAIT_TOOL_NAME.to_string(),
         description: format!(
-            "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion."
+            "Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion.\n{}",
+            code_mode_wait_tool_description().trim()
         ),
         strict: false,
         parameters: JsonSchema::Object {
@@ -1877,18 +1879,9 @@ start: source
 source: /[\s\S]+/
 "#;
 
-    let enabled_list = if enabled_tool_names.is_empty() {
-        "none".to_string()
-    } else {
-        enabled_tool_names.join(", ")
-    };
-    let description = format!(
-        "Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns, and `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument with a default of `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The default wait timeout for `{WAIT_TOOL_NAME}` is {DEFAULT_WAIT_YIELD_TIME_MS}. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
-    );
-
     ToolSpec::Freeform(FreeformTool {
         name: PUBLIC_TOOL_NAME.to_string(),
-        description,
+        description: code_mode_tool_description(enabled_tool_names),
         format: FreeformToolFormat {
             r#type: "grammar".to_string(),
             syntax: "lark".to_string(),
diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs
index 976c553dc..4f17d0d6c 100644
--- a/codex-rs/core/tests/suite/code_mode.rs
+++ b/codex-rs/core/tests/suite/code_mode.rs
@@ -834,7 +834,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
 
 #[cfg_attr(windows, ignore = "no exec_command on Windows")]
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background()
+async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
 -> Result<()> {
     skip_if_no_network!(Ok(()));
 
@@ -1051,11 +1051,11 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() ->
         format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
     let code = format!(
         r#"
-import {{ background, output_text }} from "@openai/code_mode";
+import {{ yield_control, output_text }} from "@openai/code_mode";
 import {{ exec_command }} from "tools.js";
 
 output_text("before yield");
-background();
+yield_control();
 await exec_command({{ cmd: {write_file_command:?} }});
 output_text("after yield");
 "#