Rename code mode tool to exec (#14254)
Summary - update the code-mode handler, runner, instructions, and error text to refer to the `exec` tool name everywhere that used to say `code_mode` - ensure generated documentation strings and tool specs describe `exec` and rely on the shared `PUBLIC_TOOL_NAME` - refresh the suite tests so they invoke `exec` instead of the old name Testing - Not run (not requested)
This commit is contained in:
parent
e77b2fd925
commit
8a099b3dfb
6 changed files with 82 additions and 72 deletions
|
|
@ -409,7 +409,7 @@ impl Codex {
|
|||
&& let Err(err) = resolve_compatible_node(config.js_repl_node_path.as_deref()).await
|
||||
{
|
||||
let message = format!(
|
||||
"Disabled `code_mode` for this session because the configured Node runtime is unavailable or incompatible. {err}"
|
||||
"Disabled `exec` for this session because the configured Node runtime is unavailable or incompatible. {err}"
|
||||
);
|
||||
warn!("{message}");
|
||||
let _ = config.features.disable(Feature::CodeMode);
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ use tokio::io::BufReader;
|
|||
|
||||
const CODE_MODE_RUNNER_SOURCE: &str = include_str!("code_mode_runner.cjs");
|
||||
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("code_mode_bridge.js");
|
||||
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
|
||||
|
||||
#[derive(Clone)]
|
||||
struct ExecContext {
|
||||
|
|
@ -89,15 +90,23 @@ pub(crate) fn instructions(config: &Config) -> Option<String> {
|
|||
return None;
|
||||
}
|
||||
|
||||
let mut section = String::from("## Code Mode\n");
|
||||
section.push_str(
|
||||
"- Use `code_mode` for JavaScript execution in a Node-backed `node:vm` context.\n",
|
||||
);
|
||||
section.push_str("- `code_mode` is a freeform/custom tool. Direct `code_mode` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n");
|
||||
section.push_str("- Direct tool calls remain available while `code_mode` is enabled.\n");
|
||||
section.push_str("- `code_mode` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n");
|
||||
let mut section = String::from("## Exec\n");
|
||||
section.push_str(&format!(
|
||||
"- Use `{PUBLIC_TOOL_NAME}` for JavaScript execution in a Node-backed `node:vm` context.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- `{PUBLIC_TOOL_NAME}` is a freeform/custom tool. Direct `{PUBLIC_TOOL_NAME}` calls must send raw JavaScript tool input. Do not wrap code in JSON, quotes, or markdown code fences.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled.\n",
|
||||
));
|
||||
section.push_str(&format!(
|
||||
"- `{PUBLIC_TOOL_NAME}` uses the same Node runtime resolution as `js_repl`. If needed, point `js_repl_node_path` at the Node binary you want Codex to use.\n",
|
||||
));
|
||||
section.push_str("- Import nested tools from `tools.js`, for example `import { exec_command } from \"tools.js\"` or `import { tools } from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import { append_notebook_logs_chart } from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await exec_command(args)` remain available for compatibility. Nested tool calls resolve to their code-mode result values.\n");
|
||||
section.push_str("- Import `{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution; the default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n");
|
||||
section.push_str(&format!(
|
||||
"- Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `@openai/code_mode` (or `\"openai/code_mode\"`). `output_text(value)` surfaces text back to the model and stringifies non-string objects with `JSON.stringify(...)` when possible. `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs. `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, and `load(key)` returns a cloned stored value or `undefined`. `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution; the default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker.\n",
|
||||
));
|
||||
section.push_str(
|
||||
"- Function tools require JSON object arguments. Freeform tools require raw strings.\n",
|
||||
);
|
||||
|
|
@ -149,19 +158,19 @@ async fn execute_node(
|
|||
|
||||
let mut child = cmd
|
||||
.spawn()
|
||||
.map_err(|err| format!("failed to start code_mode Node runtime: {err}"))?;
|
||||
.map_err(|err| format!("failed to start {PUBLIC_TOOL_NAME} Node runtime: {err}"))?;
|
||||
let stdout = child
|
||||
.stdout
|
||||
.take()
|
||||
.ok_or_else(|| "code_mode runner missing stdout".to_string())?;
|
||||
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdout"))?;
|
||||
let stderr = child
|
||||
.stderr
|
||||
.take()
|
||||
.ok_or_else(|| "code_mode runner missing stderr".to_string())?;
|
||||
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stderr"))?;
|
||||
let mut stdin = child
|
||||
.stdin
|
||||
.take()
|
||||
.ok_or_else(|| "code_mode runner missing stdin".to_string())?;
|
||||
.ok_or_else(|| format!("{PUBLIC_TOOL_NAME} runner missing stdin"))?;
|
||||
|
||||
let stderr_task = tokio::spawn(async move {
|
||||
let mut reader = BufReader::new(stderr);
|
||||
|
|
@ -185,13 +194,14 @@ async fn execute_node(
|
|||
while let Some(line) = stdout_lines
|
||||
.next_line()
|
||||
.await
|
||||
.map_err(|err| format!("failed to read code_mode runner stdout: {err}"))?
|
||||
.map_err(|err| format!("failed to read {PUBLIC_TOOL_NAME} runner stdout: {err}"))?
|
||||
{
|
||||
if line.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let message: NodeToHostMessage = serde_json::from_str(&line)
|
||||
.map_err(|err| format!("invalid code_mode runner message: {err}; line={line}"))?;
|
||||
let message: NodeToHostMessage = serde_json::from_str(&line).map_err(|err| {
|
||||
format!("invalid {PUBLIC_TOOL_NAME} runner message: {err}; line={line}")
|
||||
})?;
|
||||
match message {
|
||||
NodeToHostMessage::ToolCall { id, name, input } => {
|
||||
let response = HostToNodeMessage::Response {
|
||||
|
|
@ -224,20 +234,20 @@ async fn execute_node(
|
|||
let status = child
|
||||
.wait()
|
||||
.await
|
||||
.map_err(|err| format!("failed to wait for code_mode runner: {err}"))?;
|
||||
.map_err(|err| format!("failed to wait for {PUBLIC_TOOL_NAME} runner: {err}"))?;
|
||||
let stderr = stderr_task
|
||||
.await
|
||||
.map_err(|err| format!("failed to collect code_mode stderr: {err}"))?;
|
||||
.map_err(|err| format!("failed to collect {PUBLIC_TOOL_NAME} stderr: {err}"))?;
|
||||
|
||||
match final_content_items {
|
||||
Some(content_items) if status.success() => Ok(content_items),
|
||||
Some(_) => Err(format_runner_failure(
|
||||
"code_mode execution failed",
|
||||
&format!("{PUBLIC_TOOL_NAME} execution failed"),
|
||||
status,
|
||||
&stderr,
|
||||
)),
|
||||
None => Err(format_runner_failure(
|
||||
"code_mode runner exited without returning a result",
|
||||
&format!("{PUBLIC_TOOL_NAME} runner exited without returning a result"),
|
||||
status,
|
||||
&stderr,
|
||||
)),
|
||||
|
|
@ -249,19 +259,19 @@ async fn write_message(
|
|||
message: &HostToNodeMessage,
|
||||
) -> Result<(), String> {
|
||||
let line = serde_json::to_string(message)
|
||||
.map_err(|err| format!("failed to serialize code_mode message: {err}"))?;
|
||||
.map_err(|err| format!("failed to serialize {PUBLIC_TOOL_NAME} message: {err}"))?;
|
||||
stdin
|
||||
.write_all(line.as_bytes())
|
||||
.await
|
||||
.map_err(|err| format!("failed to write code_mode message: {err}"))?;
|
||||
.map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message: {err}"))?;
|
||||
stdin
|
||||
.write_all(b"\n")
|
||||
.await
|
||||
.map_err(|err| format!("failed to write code_mode message newline: {err}"))?;
|
||||
.map_err(|err| format!("failed to write {PUBLIC_TOOL_NAME} message newline: {err}"))?;
|
||||
stdin
|
||||
.flush()
|
||||
.await
|
||||
.map_err(|err| format!("failed to flush code_mode message: {err}"))
|
||||
.map_err(|err| format!("failed to flush {PUBLIC_TOOL_NAME} message: {err}"))
|
||||
}
|
||||
|
||||
fn append_stderr(message: String, stderr: &str) -> String {
|
||||
|
|
@ -336,7 +346,7 @@ async fn build_enabled_tools(exec: &ExecContext) -> Vec<EnabledTool> {
|
|||
let mut out = Vec::new();
|
||||
for spec in router.specs() {
|
||||
let tool_name = spec.name().to_string();
|
||||
if tool_name == "code_mode" {
|
||||
if tool_name == PUBLIC_TOOL_NAME {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -385,8 +395,8 @@ async fn call_nested_tool(
|
|||
tool_name: String,
|
||||
input: Option<JsonValue>,
|
||||
) -> JsonValue {
|
||||
if tool_name == "code_mode" {
|
||||
return JsonValue::String("code_mode cannot invoke itself".to_string());
|
||||
if tool_name == PUBLIC_TOOL_NAME {
|
||||
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
|
||||
}
|
||||
|
||||
let router = build_nested_router(&exec).await;
|
||||
|
|
@ -410,7 +420,7 @@ async fn call_nested_tool(
|
|||
|
||||
let call = ToolCall {
|
||||
tool_name: tool_name.clone(),
|
||||
call_id: format!("code_mode-{}", uuid::Uuid::new_v4()),
|
||||
call_id: format!("{PUBLIC_TOOL_NAME}-{}", uuid::Uuid::new_v4()),
|
||||
payload,
|
||||
};
|
||||
let result = router
|
||||
|
|
@ -442,7 +452,7 @@ fn tool_kind_for_name(specs: &[ToolSpec], tool_name: &str) -> Result<CodeModeToo
|
|||
.iter()
|
||||
.find(|spec| spec.name() == tool_name)
|
||||
.map(tool_kind_for_spec)
|
||||
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in code_mode"))
|
||||
.ok_or_else(|| format!("tool `{tool_name}` is not enabled in {PUBLIC_TOOL_NAME}"))
|
||||
}
|
||||
|
||||
fn build_nested_tool_payload(
|
||||
|
|
@ -496,8 +506,9 @@ fn output_content_items_from_json_values(
|
|||
.into_iter()
|
||||
.enumerate()
|
||||
.map(|(index, item)| {
|
||||
serde_json::from_value(item)
|
||||
.map_err(|err| format!("invalid code_mode content item at index {index}: {err}"))
|
||||
serde_json::from_value(item).map_err(|err| {
|
||||
format!("invalid {PUBLIC_TOOL_NAME} content item at index {index}: {err}")
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -317,14 +317,14 @@ function createModuleResolver(context, callTool, enabledTools, state) {
|
|||
}
|
||||
const namespacedMatch = /^tools\/(.+)\.js$/.exec(specifier);
|
||||
if (!namespacedMatch) {
|
||||
throw new Error(`Unsupported import in code_mode: ${specifier}`);
|
||||
throw new Error(`Unsupported import in exec: ${specifier}`);
|
||||
}
|
||||
|
||||
const namespace = namespacedMatch[1]
|
||||
.split('/')
|
||||
.filter((segment) => segment.length > 0);
|
||||
if (namespace.length === 0) {
|
||||
throw new Error(`Unsupported import in code_mode: ${specifier}`);
|
||||
throw new Error(`Unsupported import in exec: ${specifier}`);
|
||||
}
|
||||
|
||||
const cacheKey = namespace.join('/');
|
||||
|
|
@ -347,7 +347,7 @@ async function runModule(context, request, state, callTool) {
|
|||
);
|
||||
const mainModule = new SourceTextModule(request.source, {
|
||||
context,
|
||||
identifier: 'code_mode_main.mjs',
|
||||
identifier: 'exec_main.mjs',
|
||||
importModuleDynamically: async (specifier) => resolveModule(specifier),
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ use async_trait::async_trait;
|
|||
use crate::features::Feature;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::code_mode;
|
||||
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
|
|
@ -33,17 +34,17 @@ impl ToolHandler for CodeModeHandler {
|
|||
} = invocation;
|
||||
|
||||
if !session.features().enabled(Feature::CodeMode) {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"code_mode is disabled by feature flag".to_string(),
|
||||
));
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"{PUBLIC_TOOL_NAME} is disabled by feature flag"
|
||||
)));
|
||||
}
|
||||
|
||||
let code = match payload {
|
||||
ToolPayload::Custom { input } => input,
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"code_mode expects raw JavaScript source text".to_string(),
|
||||
));
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"{PUBLIC_TOOL_NAME} expects raw JavaScript source text"
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use crate::features::Feature;
|
|||
use crate::features::Features;
|
||||
use crate::mcp_connection_manager::ToolInfo;
|
||||
use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
||||
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
|
||||
use crate::tools::handlers::PLAN_TOOL;
|
||||
use crate::tools::handlers::SEARCH_TOOL_BM25_DEFAULT_LIMIT;
|
||||
use crate::tools::handlers::SEARCH_TOOL_BM25_TOOL_NAME;
|
||||
|
|
@ -1620,11 +1621,11 @@ source: /[\s\S]+/
|
|||
enabled_tool_names.join(", ")
|
||||
};
|
||||
let description = format!(
|
||||
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `code_mode` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `code_mode` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `code_mode` execution. The default is `10000`. This guards the overall `code_mode` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
|
||||
"Runs JavaScript in a Node-backed `node:vm` context. This is a freeform tool: send raw JavaScript source text (no JSON/quotes/markdown fences). Direct tool calls remain available while `{PUBLIC_TOOL_NAME}` is enabled. Inside JavaScript, import nested tools from `tools.js`, for example `import {{ exec_command }} from \"tools.js\"` or `import {{ tools }} from \"tools.js\"`. Namespaced tools are also available from `tools/<namespace...>.js`; MCP tools use `tools/mcp/<server>.js`, for example `import {{ append_notebook_logs_chart }} from \"tools/mcp/ologs.js\"`. `tools[name]` and identifier wrappers like `await shell(args)` remain available for compatibility when the tool name is a valid JS identifier. Nested tool calls resolve to their code-mode result values. Import `{{ output_text, output_image, set_max_output_tokens_per_exec_call, store, load }}` from `\"@openai/code_mode\"` (or `\"openai/code_mode\"`); `output_text(value)` surfaces text back to the model and stringifies non-string objects when possible, `output_image(imageUrl)` appends an `input_image` content item for `http(s)` or `data:` URLs, `store(key, value)` persists JSON-serializable values across `{PUBLIC_TOOL_NAME}` calls in the current session, `load(key)` returns a cloned stored value or `undefined`, and `set_max_output_tokens_per_exec_call(value)` sets the token budget used to truncate the final Rust-side result of the current `{PUBLIC_TOOL_NAME}` execution. The default is `10000`. This guards the overall `{PUBLIC_TOOL_NAME}` output, not individual nested tool invocations. When truncation happens, the final text uses the unified-exec style `Original token count:` / `Output:` wrapper and the usual `…N tokens truncated…` marker. Function tools require JSON object arguments. Freeform tools require raw strings. `add_content(value)` remains available for compatibility with a content item, content-item array, or string. Structured nested-tool results should be converted to text first, for example with `JSON.stringify(...)`. Only content passed to `output_text(...)`, `output_image(...)`, or `add_content(value)` is surfaced back to the model. Enabled nested tools: {enabled_list}."
|
||||
);
|
||||
|
||||
ToolSpec::Freeform(FreeformTool {
|
||||
name: "code_mode".to_string(),
|
||||
name: PUBLIC_TOOL_NAME.to_string(),
|
||||
description,
|
||||
format: FreeformToolFormat {
|
||||
r#type: "grammar".to_string(),
|
||||
|
|
@ -2026,12 +2027,12 @@ pub(crate) fn build_specs(
|
|||
let mut enabled_tool_names = nested_specs
|
||||
.into_iter()
|
||||
.map(|spec| spec.spec.name().to_string())
|
||||
.filter(|name| name != "code_mode")
|
||||
.filter(|name| name != PUBLIC_TOOL_NAME)
|
||||
.collect::<Vec<_>>();
|
||||
enabled_tool_names.sort();
|
||||
enabled_tool_names.dedup();
|
||||
builder.push_spec(create_code_mode_tool(&enabled_tool_names));
|
||||
builder.register_handler("code_mode", code_mode_handler);
|
||||
builder.register_handler(PUBLIC_TOOL_NAME, code_mode_handler);
|
||||
}
|
||||
|
||||
match &config.shell_type {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ async fn run_code_mode_turn(
|
|||
server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "code_mode", code),
|
||||
ev_custom_tool_call("call-1", "exec", code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
|
|
@ -114,7 +114,7 @@ async fn run_code_mode_turn_with_rmcp(
|
|||
server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call("call-1", "code_mode", code),
|
||||
ev_custom_tool_call("call-1", "exec", code),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
|
|
@ -141,7 +141,7 @@ async fn code_mode_can_return_exec_command_output() -> Result<()> {
|
|||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to run exec_command",
|
||||
"use exec to run exec_command",
|
||||
r#"
|
||||
import { exec_command } from "tools.js";
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ add_content(JSON.stringify(await exec_command({ cmd: "printf code_mode_exec_mark
|
|||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode call failed unexpectedly: {output}"
|
||||
"exec call failed unexpectedly: {output}"
|
||||
);
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
assert!(
|
||||
|
|
@ -184,7 +184,7 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
|
|||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to truncate the final result",
|
||||
"use exec to truncate the final result",
|
||||
r#"
|
||||
import { exec_command } from "tools.js";
|
||||
import { set_max_output_tokens_per_exec_call } from "@openai/code_mode";
|
||||
|
|
@ -205,7 +205,7 @@ add_content(JSON.stringify(await exec_command({
|
|||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode call failed unexpectedly: {output}"
|
||||
"exec call failed unexpectedly: {output}"
|
||||
);
|
||||
let expected_pattern = r#"(?sx)
|
||||
\A
|
||||
|
|
@ -228,7 +228,7 @@ async fn code_mode_can_output_serialized_text_via_openai_code_mode_module() -> R
|
|||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to return structured text",
|
||||
"use exec to return structured text",
|
||||
r#"
|
||||
import { output_text } from "@openai/code_mode";
|
||||
|
||||
|
|
@ -243,7 +243,7 @@ output_text({ json: true });
|
|||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode call failed unexpectedly: {output}"
|
||||
"exec call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(output, r#"{"json":true}"#);
|
||||
|
||||
|
|
@ -257,7 +257,7 @@ async fn code_mode_surfaces_output_text_stringify_errors() -> Result<()> {
|
|||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to return circular text",
|
||||
"use exec to return circular text",
|
||||
r#"
|
||||
import { output_text } from "@openai/code_mode";
|
||||
|
||||
|
|
@ -276,7 +276,7 @@ output_text(circular);
|
|||
Some(true),
|
||||
"circular stringify unexpectedly succeeded"
|
||||
);
|
||||
assert!(output.contains("code_mode execution failed"));
|
||||
assert!(output.contains("exec execution failed"));
|
||||
assert!(output.contains("Converting circular structure to JSON"));
|
||||
|
||||
Ok(())
|
||||
|
|
@ -289,7 +289,7 @@ async fn code_mode_can_output_images_via_openai_code_mode_module() -> Result<()>
|
|||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use code_mode to return images",
|
||||
"use exec to return images",
|
||||
r#"
|
||||
import { output_image } from "@openai/code_mode";
|
||||
|
||||
|
|
@ -342,14 +342,14 @@ async fn code_mode_can_apply_patch_via_nested_tool() -> Result<()> {
|
|||
);
|
||||
|
||||
let (test, second_mock) =
|
||||
run_code_mode_turn(&server, "use code_mode to run apply_patch", &code, true).await?;
|
||||
run_code_mode_turn(&server, "use exec to run apply_patch", &code, true).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode apply_patch call failed unexpectedly: {output}"
|
||||
"exec apply_patch call failed unexpectedly: {output}"
|
||||
);
|
||||
|
||||
let file_path = test.cwd_path().join(file_name);
|
||||
|
|
@ -378,15 +378,14 @@ add_content(
|
|||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
|
||||
.await?;
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp echo call failed unexpectedly: {output}"
|
||||
"exec rmcp echo call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
|
|
@ -418,15 +417,14 @@ add_content(
|
|||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to run the rmcp echo tool", code)
|
||||
.await?;
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to run the rmcp echo tool", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp echo call failed unexpectedly: {output}"
|
||||
"exec rmcp echo call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
|
|
@ -460,7 +458,7 @@ add_content(
|
|||
|
||||
let (_test, second_mock) = run_code_mode_turn_with_rmcp(
|
||||
&server,
|
||||
"use code_mode to run the rmcp image scenario tool",
|
||||
"use exec to run the rmcp image scenario tool",
|
||||
code,
|
||||
)
|
||||
.await?;
|
||||
|
|
@ -470,7 +468,7 @@ add_content(
|
|||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp image scenario call failed unexpectedly: {output}"
|
||||
"exec rmcp image scenario call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
|
|
@ -504,15 +502,14 @@ add_content(
|
|||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use code_mode to call rmcp echo badly", code)
|
||||
.await?;
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to call rmcp echo badly", code).await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_text_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode rmcp error call failed unexpectedly: {output}"
|
||||
"exec rmcp error call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(
|
||||
output,
|
||||
|
|
@ -540,7 +537,7 @@ async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
|
|||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call(
|
||||
"call-1",
|
||||
"code_mode",
|
||||
"exec",
|
||||
r#"
|
||||
import { store } from "@openai/code_mode";
|
||||
|
||||
|
|
@ -569,7 +566,7 @@ add_content("stored");
|
|||
assert_ne!(
|
||||
first_success,
|
||||
Some(false),
|
||||
"code_mode store call failed unexpectedly: {first_output}"
|
||||
"exec store call failed unexpectedly: {first_output}"
|
||||
);
|
||||
assert_eq!(first_output, "stored");
|
||||
|
||||
|
|
@ -579,7 +576,7 @@ add_content("stored");
|
|||
ev_response_created("resp-3"),
|
||||
ev_custom_tool_call(
|
||||
"call-2",
|
||||
"code_mode",
|
||||
"exec",
|
||||
r#"
|
||||
import { load } from "openai/code_mode";
|
||||
|
||||
|
|
@ -607,7 +604,7 @@ add_content(JSON.stringify(load("nb")));
|
|||
assert_ne!(
|
||||
second_success,
|
||||
Some(false),
|
||||
"code_mode load call failed unexpectedly: {second_output}"
|
||||
"exec load call failed unexpectedly: {second_output}"
|
||||
);
|
||||
let loaded: Value = serde_json::from_str(&second_output)?;
|
||||
assert_eq!(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue