Cleanup code_mode tool descriptions (#14480)

Move to separate files and clarify a bit.
This commit is contained in:
pakrym-oai 2026-03-12 11:13:35 -07:00 committed by GitHub
parent 774965f1e8
commit cfe3f6821a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 53 additions and 59 deletions

2
codex-rs/Cargo.lock generated
View file

@ -1439,6 +1439,7 @@ dependencies = [
"codex-utils-cargo-bin",
"codex-utils-cli",
"codex-utils-json-to-toml",
"codex-utils-pty",
"core_test_support",
"futures",
"opentelemetry",
@ -2438,7 +2439,6 @@ dependencies = [
"anyhow",
"chrono",
"clap",
"codex-otel",
"codex-protocol",
"dirs",
"log",

View file

@ -25,7 +25,6 @@ use crate::plugins::PluginCapabilitySummary;
use crate::plugins::render_plugins_section;
use crate::skills::SkillMetadata;
use crate::skills::render_skills_section;
use crate::tools::code_mode;
use codex_app_server_protocol::ConfigLayerSource;
use dunce::canonicalize as normalize_path;
use std::path::PathBuf;
@ -120,13 +119,6 @@ pub(crate) async fn get_user_instructions(
output.push_str(&plugin_section);
}
if let Some(code_mode_section) = code_mode::instructions(config) {
if !output.is_empty() {
output.push_str("\n\n");
}
output.push_str(&code_mode_section);
}
let skills_section = skills.and_then(render_skills_section);
if let Some(skills_section) = skills_section {
if !output.is_empty() {

View file

@ -0,0 +1,19 @@
## exec
- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js`
- Tool methods take either string or object as parameter.
- They return either a structured value or a string based on the description above.
- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values.
- Methods available in `@openai/code_mode` module:
- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values.
- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.

View file

@ -14,8 +14,6 @@ use serde_json::Value as JsonValue;
use crate::client_common::tools::ToolSpec;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::features::Feature;
use crate::tools::ToolRouter;
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
use crate::tools::code_mode_description::code_mode_tool_reference;
@ -32,6 +30,8 @@ use crate::unified_exec::resolve_max_tokens;
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
@ -60,38 +60,20 @@ enum CodeModeExecutionStatus {
Terminated,
}
pub(crate) fn instructions(config: &Config) -> Option<String> {
if !config.features.enabled(Feature::CodeMode) {
return None;
}
pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
let enabled_list = if enabled_tool_names.is_empty() {
"none".to_string()
} else {
enabled_tool_names.join(", ")
};
format!(
"{}\n- Enabled nested tools: {enabled_list}.",
CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
)
}
let mut section = String::from("## Exec\n");
section.push_str(&format!(
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
));
section.push_str(&format!(
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
));
section.push_str(&format!(
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
));
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
section.push_str(&format!(
"- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
));
section.push_str(&format!(
"- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
));
section.push_str(
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
);
section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
section
.push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
Some(section)
pub(crate) fn wait_tool_description() -> &'static str {
CODE_MODE_WAIT_DESCRIPTION_TEMPLATE
}
async fn handle_node_message(

View file

@ -265,7 +265,7 @@ function codeModeWorkerMain() {
'set_max_output_tokens_per_exec_call',
'set_yield_time',
'store',
'background',
'yield_control',
],
function initCodeModeModule() {
this.setExport('load', load);
@ -289,7 +289,7 @@ function codeModeWorkerMain() {
return normalized;
});
this.setExport('store', store);
this.setExport('background', () => {
this.setExport('yield_control', () => {
parentPort.postMessage({ type: 'yield' });
});
},

View file

@ -0,0 +1,8 @@
- Use `exec_wait` only after `exec` returns `Script running with session ID ...`.
- `session_id` identifies the running `exec` session to resume.
- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout.
- `max_tokens` limits how much new output this wait call returns.
- `terminate: true` stops the running session instead of waiting for more output.
- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that session.
- If the session is still running, `exec_wait` may yield again with the same `session_id`.
- If the session has already finished, `exec_wait` returns the completed result and closes the session.

View file

@ -8,9 +8,10 @@ use crate::features::Features;
use crate::mcp_connection_manager::ToolInfo;
use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
use crate::original_image_detail::can_request_original_image_detail;
use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS;
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
use crate::tools::code_mode::WAIT_TOOL_NAME;
use crate::tools::code_mode::tool_description as code_mode_tool_description;
use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
use crate::tools::discoverable::DiscoverablePluginInfo;
use crate::tools::discoverable::DiscoverableTool;
@ -627,7 +628,8 @@ fn create_exec_wait_tool() -> ToolSpec {
ToolSpec::Function(ResponsesApiTool {
name: WAIT_TOOL_NAME.to_string(),
description: format!(
"Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion."
"Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion.\n{}",
code_mode_wait_tool_description().trim()
),
strict: false,
parameters: JsonSchema::Object {
@ -1877,18 +1879,9 @@ start: source
source: /[\s\S]+/
"#;
let enabled_list = if enabled_tool_names.is_empty() {
"none".to_string()
} else {
enabled_tool_names.join(", ")
};
let description = format!(
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns, and `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument with a default of `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The default wait timeout for `{WAIT_TOOL_NAME}` is {DEFAULT_WAIT_YIELD_TIME_MS}. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
);
ToolSpec::Freeform(FreeformTool {
name: PUBLIC_TOOL_NAME.to_string(),
description,
description: code_mode_tool_description(enabled_tool_names),
format: FreeformToolFormat {
r#type: "grammar".to_string(),
syntax: "lark".to_string(),

View file

@ -834,7 +834,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background()
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
-> Result<()> {
skip_if_no_network!(Ok(()));
@ -1051,11 +1051,11 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() ->
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
let code = format!(
r#"
import {{ background, output_text }} from "@openai/code_mode";
import {{ yield_control, output_text }} from "@openai/code_mode";
import {{ exec_command }} from "tools.js";
output_text("before yield");
background();
yield_control();
await exec_command({{ cmd: {write_file_command:?} }});
output_text("after yield");
"#