Cleanup code_mode tool descriptions (#14480)
Move to separate files and clarify a bit.
This commit is contained in:
parent
774965f1e8
commit
cfe3f6821a
8 changed files with 53 additions and 59 deletions
2
codex-rs/Cargo.lock
generated
2
codex-rs/Cargo.lock
generated
|
|
@ -1439,6 +1439,7 @@ dependencies = [
|
|||
"codex-utils-cargo-bin",
|
||||
"codex-utils-cli",
|
||||
"codex-utils-json-to-toml",
|
||||
"codex-utils-pty",
|
||||
"core_test_support",
|
||||
"futures",
|
||||
"opentelemetry",
|
||||
|
|
@ -2438,7 +2439,6 @@ dependencies = [
|
|||
"anyhow",
|
||||
"chrono",
|
||||
"clap",
|
||||
"codex-otel",
|
||||
"codex-protocol",
|
||||
"dirs",
|
||||
"log",
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@ use crate::plugins::PluginCapabilitySummary;
|
|||
use crate::plugins::render_plugins_section;
|
||||
use crate::skills::SkillMetadata;
|
||||
use crate::skills::render_skills_section;
|
||||
use crate::tools::code_mode;
|
||||
use codex_app_server_protocol::ConfigLayerSource;
|
||||
use dunce::canonicalize as normalize_path;
|
||||
use std::path::PathBuf;
|
||||
|
|
@ -120,13 +119,6 @@ pub(crate) async fn get_user_instructions(
|
|||
output.push_str(&plugin_section);
|
||||
}
|
||||
|
||||
if let Some(code_mode_section) = code_mode::instructions(config) {
|
||||
if !output.is_empty() {
|
||||
output.push_str("\n\n");
|
||||
}
|
||||
output.push_str(&code_mode_section);
|
||||
}
|
||||
|
||||
let skills_section = skills.and_then(render_skills_section);
|
||||
if let Some(skills_section) = skills_section {
|
||||
if !output.is_empty() {
|
||||
|
|
|
|||
19
codex-rs/core/src/tools/code_mode/description.md
Normal file
19
codex-rs/core/src/tools/code_mode/description.md
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
## exec
|
||||
- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
|
||||
- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
|
||||
- You have a set of tools provided to you. They are imported either from `tools.js` or `/mcp/server.js`
|
||||
- Tool methods take either string or object as parameter.
|
||||
- They return either a structured value or a string based on the description above.
|
||||
|
||||
- Surface text back to the model with `output_text(v: string | number | boolean | undefined | null)`. A string representation of the value is returned to the model. Manually serialize complex values.
|
||||
|
||||
- Methods available in `@openai/code_mode` module:
|
||||
- `output_text(value: string | number | boolean | undefined | null)`: A string representation of the value is returned to the model. Manually serialize complex values.
|
||||
- `output_image(imageUrl: string)`: An image is returned to the model. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
|
||||
- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
|
||||
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
|
||||
|
||||
- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
|
||||
- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
|
||||
- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.
|
||||
|
||||
|
|
@ -14,8 +14,6 @@ use serde_json::Value as JsonValue;
|
|||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::config::Config;
|
||||
use crate::features::Feature;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
|
||||
use crate::tools::code_mode_description::code_mode_tool_reference;
|
||||
|
|
@ -32,6 +30,8 @@ use crate::unified_exec::resolve_max_tokens;
|
|||
|
||||
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
|
||||
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
|
||||
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
|
||||
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
|
||||
|
||||
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
|
||||
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
|
||||
|
|
@ -60,38 +60,20 @@ enum CodeModeExecutionStatus {
|
|||
Terminated,
|
||||
}
|
||||
|
||||
pub(crate) fn instructions(config: &Config) -> Option<String> {
|
||||
if !config.features.enabled(Feature::CodeMode) {
|
||||
return None;
|
||||
}
|
||||
pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
|
||||
let enabled_list = if enabled_tool_names.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
enabled_tool_names.join(", ")
|
||||
};
|
||||
format!(
|
||||
"{}\n- Enabled nested tools: {enabled_list}.",
|
||||
CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
|
||||
)
|
||||
}
|
||||
|
||||
let mut section = String::from("## Exec\n");
|
||||
section.push_str(&format!(
|
||||
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
|
||||
));
|
||||
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { ALL_TOOLS } from \"tools.js\"` to inspect the available `{ module, name, description }` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values.\n");
|
||||
section.push_str(&format!(
|
||||
"- Import `{{ background, output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns; `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument instead and defaults to `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. `background()` returns a yielded `{PUBLIC_TOOL_NAME}` response immediately while the script keeps running in the background. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- If `{PUBLIC_TOOL_NAME}` returns `Script running with session ID …`, call `{WAIT_TOOL_NAME}` with that `session_id` to keep waiting for more output, completion, or termination.\n",
|
||||
));
|
||||
section.push_str(
|
||||
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
|
||||
);
|
||||
section.push_str("- `add_content(value)` remains available for compatibility. It is synchronous and accepts a content item, an array of content items, or a string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`.\n");
|
||||
section
|
||||
.push_str("- Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model.");
|
||||
Some(section)
|
||||
pub(crate) fn wait_tool_description() -> &'static str {
|
||||
CODE_MODE_WAIT_DESCRIPTION_TEMPLATE
|
||||
}
|
||||
|
||||
async fn handle_node_message(
|
||||
|
|
|
|||
|
|
@ -265,7 +265,7 @@ function codeModeWorkerMain() {
|
|||
'set_max_output_tokens_per_exec_call',
|
||||
'set_yield_time',
|
||||
'store',
|
||||
'background',
|
||||
'yield_control',
|
||||
],
|
||||
function initCodeModeModule() {
|
||||
this.setExport('load', load);
|
||||
|
|
@ -289,7 +289,7 @@ function codeModeWorkerMain() {
|
|||
return normalized;
|
||||
});
|
||||
this.setExport('store', store);
|
||||
this.setExport('background', () => {
|
||||
this.setExport('yield_control', () => {
|
||||
parentPort.postMessage({ type: 'yield' });
|
||||
});
|
||||
},
|
||||
|
|
|
|||
8
codex-rs/core/src/tools/code_mode/wait_description.md
Normal file
8
codex-rs/core/src/tools/code_mode/wait_description.md
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
- Use `exec_wait` only after `exec` returns `Script running with session ID ...`.
|
||||
- `session_id` identifies the running `exec` session to resume.
|
||||
- `yield_time_ms` controls how long to wait for more output before yielding again. If omitted, `exec_wait` uses its default wait timeout.
|
||||
- `max_tokens` limits how much new output this wait call returns.
|
||||
- `terminate: true` stops the running session instead of waiting for more output.
|
||||
- `exec_wait` returns only the new output since the last yield, or the final completion or termination result for that session.
|
||||
- If the session is still running, `exec_wait` may yield again with the same `session_id`.
|
||||
- If the session has already finished, `exec_wait` returns the completed result and closes the session.
|
||||
|
|
@ -8,9 +8,10 @@ use crate::features::Features;
|
|||
use crate::mcp_connection_manager::ToolInfo;
|
||||
use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
||||
use crate::original_image_detail::can_request_original_image_detail;
|
||||
use crate::tools::code_mode::DEFAULT_WAIT_YIELD_TIME_MS;
|
||||
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
|
||||
use crate::tools::code_mode::WAIT_TOOL_NAME;
|
||||
use crate::tools::code_mode::tool_description as code_mode_tool_description;
|
||||
use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
|
||||
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
|
||||
use crate::tools::discoverable::DiscoverablePluginInfo;
|
||||
use crate::tools::discoverable::DiscoverableTool;
|
||||
|
|
@ -627,7 +628,8 @@ fn create_exec_wait_tool() -> ToolSpec {
|
|||
ToolSpec::Function(ResponsesApiTool {
|
||||
name: WAIT_TOOL_NAME.to_string(),
|
||||
description: format!(
|
||||
"Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion."
|
||||
"Waits on a yielded `{PUBLIC_TOOL_NAME}` session and returns new output or completion.\n{}",
|
||||
code_mode_wait_tool_description().trim()
|
||||
),
|
||||
strict: false,
|
||||
parameters: JsonSchema::Object {
|
||||
|
|
@ -1877,18 +1879,9 @@ start: source
|
|||
source: /[\s\S]+/
|
||||
"#;
|
||||
|
||||
let enabled_list = if enabled_tool_names.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
enabled_tool_names.join(", ")
|
||||
};
|
||||
let description = format!(
|
||||
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ ALL_TOOLS }} from \"tools.js\"` to inspect the available `{{ module, name, description }}` entries. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, set_yield_time, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate direct `{PUBLIC_TOOL_NAME}` returns, and `{WAIT_TOOL_NAME}` uses its own `max_tokens` argument with a default of `10000`. `set_yield_time(value)` asks `{PUBLIC_TOOL_NAME}` to return early if the script is still running after that many milliseconds so `{WAIT_TOOL_NAME}` can resume it later. The default wait timeout for `{WAIT_TOOL_NAME}` is {DEFAULT_WAIT_YIELD_TIME_MS}. The returned content starts with a separate `Script completed`, `Script failed`, or `Script running with session ID …` text item that includes wall time. When truncation happens, the final text may include `Total output lines:` and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
|
||||
);
|
||||
|
||||
ToolSpec::Freeform(FreeformTool {
|
||||
name: PUBLIC_TOOL_NAME.to_string(),
|
||||
description,
|
||||
description: code_mode_tool_description(enabled_tool_names),
|
||||
format: FreeformToolFormat {
|
||||
r#type: "grammar".to_string(),
|
||||
syntax: "lark".to_string(),
|
||||
|
|
|
|||
|
|
@ -834,7 +834,7 @@ async fn code_mode_exec_wait_returns_error_for_unknown_session() -> Result<()> {
|
|||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_in_background()
|
||||
async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_after_yield_control()
|
||||
-> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
|
|
@ -1051,11 +1051,11 @@ async fn code_mode_background_keeps_running_on_later_turn_without_exec_wait() ->
|
|||
format!("while [ ! -f {resumed_file_quoted} ]; do sleep 0.01; done; printf ready");
|
||||
let code = format!(
|
||||
r#"
|
||||
import {{ background, output_text }} from "@openai/code_mode";
|
||||
import {{ yield_control, output_text }} from "@openai/code_mode";
|
||||
import {{ exec_command }} from "tools.js";
|
||||
|
||||
output_text("before yield");
|
||||
background();
|
||||
yield_control();
|
||||
await exec_command({{ cmd: {write_file_command:?} }});
|
||||
output_text("after yield");
|
||||
"#
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue