code_mode: Move exec params from runtime declarations to @pragma (#14511)
This change moves code_mode exec session settings out of the runtime API
and into an optional first-line pragma, so instead of calling runtime
helpers like set_yield_time() or set_max_output_tokens_per_exec_call(),
the model can write // @exec: {"yield_time_ms": ...,
"max_output_tokens": ...} at the top of the freeform exec source. Rust
now parses that pragma before building the source, validates it, and
passes the values directly in the exec start message to the code-mode
broker, which applies them at session start without any worker-runtime
mutation path. The @openai/code_mode module no longer exposes those
setter functions, the docs and grammar were updated to describe the
pragma form, and the existing code_mode tests were converted to use
pragma-based configuration instead.
This commit is contained in:
parent
1a363d5fcf
commit
0daffe667a
9 changed files with 235 additions and 99 deletions
|
|
@ -29,11 +29,6 @@ Object.defineProperty(globalThis, '__codexContentItems', {
|
|||
defineGlobal('ALL_TOOLS', __codexRuntime.ALL_TOOLS);
|
||||
defineGlobal('image', __codexRuntime.image);
|
||||
defineGlobal('load', __codexRuntime.load);
|
||||
defineGlobal(
|
||||
'set_max_output_tokens_per_exec_call',
|
||||
__codexRuntime.set_max_output_tokens_per_exec_call
|
||||
);
|
||||
defineGlobal('set_yield_time', __codexRuntime.set_yield_time);
|
||||
defineGlobal('store', __codexRuntime.store);
|
||||
defineGlobal('text', __codexRuntime.text);
|
||||
defineGlobal('tools', __codexRuntime.tools);
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
## exec
|
||||
- Runs raw JavaScript in an isolated context (no Node, no file system, or network access, no console).
|
||||
- Send raw JavaScript source text, not JSON, quoted strings, or markdown code fences.
|
||||
- You may optionally start the tool input with a first-line pragma like `// @exec: {"yield_time_ms": 10000, "max_output_tokens": 1000}`.
|
||||
- `yield_time_ms` asks `exec` to yield early after that many milliseconds if the script is still running.
|
||||
- `max_output_tokens` sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
|
||||
- All nested tools are available on the global `tools` object, for example `await tools.exec_command(...)`. Tool names are exposed as normalized JavaScript identifiers, for example `await tools.mcp__ologs__get_profile(...)`.
|
||||
- Tool methods take either string or object as parameter.
|
||||
- They return either a structured value or a string based on the description above.
|
||||
|
|
@ -8,9 +11,7 @@
|
|||
- Global helpers:
|
||||
- `text(value: string | number | boolean | undefined | null)`: Appends a text item and returns it. Non-string values are stringified with `JSON.stringify(...)` when possible.
|
||||
- `image(imageUrl: string)`: Appends an image item and returns it. `image_url` can be an HTTPS URL or a base64-encoded `data:` URL.
|
||||
- `store(key: string, value: any)`: stores a serializeable value under a string key for later `exec` calls in the same session.
|
||||
- `store(key: string, value: any)`: stores a serializable value under a string key for later `exec` calls in the same session.
|
||||
- `load(key: string)`: returns the stored value for a string key, or `undefined` if it is missing.
|
||||
- `ALL_TOOLS`: metadata for the enabled nested tools as `{ name, description }` entries.
|
||||
- `set_max_output_tokens_per_exec_call(value)`: sets the token budget for direct `exec` results. By default the result is truncated to 10000 tokens.
|
||||
- `set_yield_time(value)`: asks `exec` to yield early after that many milliseconds if the script is still running.
|
||||
- `yield_control()`: yields the accumulated output to the model immediately while the script keeps running.
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use async_trait::async_trait;
|
||||
use serde::Deserialize;
|
||||
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
|
|
@ -9,6 +10,7 @@ use crate::tools::context::ToolPayload;
|
|||
use crate::tools::registry::ToolHandler;
|
||||
use crate::tools::registry::ToolKind;
|
||||
|
||||
use super::CODE_MODE_PRAGMA_PREFIX;
|
||||
use super::CodeModeSessionProgress;
|
||||
use super::ExecContext;
|
||||
use super::PUBLIC_TOOL_NAME;
|
||||
|
|
@ -18,6 +20,23 @@ use super::protocol::HostToNodeMessage;
|
|||
use super::protocol::build_source;
|
||||
|
||||
pub struct CodeModeExecuteHandler;
|
||||
const MAX_JS_SAFE_INTEGER: u64 = (1_u64 << 53) - 1;
|
||||
|
||||
#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct CodeModeExecPragma {
|
||||
#[serde(default)]
|
||||
yield_time_ms: Option<u64>,
|
||||
#[serde(default)]
|
||||
max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
struct CodeModeExecArgs {
|
||||
code: String,
|
||||
yield_time_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
}
|
||||
|
||||
impl CodeModeExecuteHandler {
|
||||
async fn execute(
|
||||
|
|
@ -26,12 +45,13 @@ impl CodeModeExecuteHandler {
|
|||
turn: std::sync::Arc<TurnContext>,
|
||||
code: String,
|
||||
) -> Result<FunctionToolOutput, FunctionCallError> {
|
||||
let args = parse_freeform_args(&code)?;
|
||||
let exec = ExecContext { session, turn };
|
||||
let enabled_tools = build_enabled_tools(&exec).await;
|
||||
let service = &exec.session.services.code_mode_service;
|
||||
let stored_values = service.stored_values().await;
|
||||
let source =
|
||||
build_source(&code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
|
||||
build_source(&args.code, &enabled_tools).map_err(FunctionCallError::RespondToModel)?;
|
||||
let cell_id = service.allocate_cell_id().await;
|
||||
let request_id = service.allocate_request_id().await;
|
||||
let process_slot = service
|
||||
|
|
@ -46,6 +66,8 @@ impl CodeModeExecuteHandler {
|
|||
enabled_tools,
|
||||
stored_values,
|
||||
source,
|
||||
yield_time_ms: args.yield_time_ms,
|
||||
max_output_tokens: args.max_output_tokens,
|
||||
};
|
||||
let result = {
|
||||
let mut process_slot = process_slot;
|
||||
|
|
@ -72,6 +94,91 @@ impl CodeModeExecuteHandler {
|
|||
}
|
||||
}
|
||||
|
||||
fn parse_freeform_args(input: &str) -> Result<CodeModeExecArgs, FunctionCallError> {
|
||||
if input.trim().is_empty() {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec expects raw JavaScript source text (non-empty). Provide JS only, optionally with first-line `// @exec: {\"yield_time_ms\": 10000, \"max_output_tokens\": 1000}`.".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let mut args = CodeModeExecArgs {
|
||||
code: input.to_string(),
|
||||
yield_time_ms: None,
|
||||
max_output_tokens: None,
|
||||
};
|
||||
|
||||
let mut lines = input.splitn(2, '\n');
|
||||
let first_line = lines.next().unwrap_or_default();
|
||||
let rest = lines.next().unwrap_or_default();
|
||||
let trimmed = first_line.trim_start();
|
||||
let Some(pragma) = trimmed.strip_prefix(CODE_MODE_PRAGMA_PREFIX) else {
|
||||
return Ok(args);
|
||||
};
|
||||
|
||||
if rest.trim().is_empty() {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec pragma must be followed by JavaScript source on subsequent lines".to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let directive = pragma.trim();
|
||||
if directive.is_empty() {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec pragma must be a JSON object with supported fields `yield_time_ms` and `max_output_tokens`"
|
||||
.to_string(),
|
||||
));
|
||||
}
|
||||
|
||||
let value: serde_json::Value = serde_json::from_str(directive).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"exec pragma must be valid JSON with supported fields `yield_time_ms` and `max_output_tokens`: {err}"
|
||||
))
|
||||
})?;
|
||||
let object = value.as_object().ok_or_else(|| {
|
||||
FunctionCallError::RespondToModel(
|
||||
"exec pragma must be a JSON object with supported fields `yield_time_ms` and `max_output_tokens`"
|
||||
.to_string(),
|
||||
)
|
||||
})?;
|
||||
for key in object.keys() {
|
||||
match key.as_str() {
|
||||
"yield_time_ms" | "max_output_tokens" => {}
|
||||
_ => {
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"exec pragma only supports `yield_time_ms` and `max_output_tokens`; got `{key}`"
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let pragma: CodeModeExecPragma = serde_json::from_value(value).map_err(|err| {
|
||||
FunctionCallError::RespondToModel(format!(
|
||||
"exec pragma fields `yield_time_ms` and `max_output_tokens` must be non-negative safe integers: {err}"
|
||||
))
|
||||
})?;
|
||||
if pragma
|
||||
.yield_time_ms
|
||||
.is_some_and(|yield_time_ms| yield_time_ms > MAX_JS_SAFE_INTEGER)
|
||||
{
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec pragma field `yield_time_ms` must be a non-negative safe integer".to_string(),
|
||||
));
|
||||
}
|
||||
if pragma.max_output_tokens.is_some_and(|max_output_tokens| {
|
||||
u64::try_from(max_output_tokens)
|
||||
.map(|max_output_tokens| max_output_tokens > MAX_JS_SAFE_INTEGER)
|
||||
.unwrap_or(true)
|
||||
}) {
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"exec pragma field `max_output_tokens` must be a non-negative safe integer".to_string(),
|
||||
));
|
||||
}
|
||||
args.code = rest.to_string();
|
||||
args.yield_time_ms = pragma.yield_time_ms;
|
||||
args.max_output_tokens = pragma.max_output_tokens;
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl ToolHandler for CodeModeExecuteHandler {
|
||||
type Output = FunctionToolOutput;
|
||||
|
|
@ -103,3 +210,7 @@ impl ToolHandler for CodeModeExecuteHandler {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[path = "execute_handler_tests.rs"]
|
||||
mod execute_handler_tests;
|
||||
|
|
|
|||
41
codex-rs/core/src/tools/code_mode/execute_handler_tests.rs
Normal file
41
codex-rs/core/src/tools/code_mode/execute_handler_tests.rs
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
use super::parse_freeform_args;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn parse_freeform_args_without_pragma() {
|
||||
let args = parse_freeform_args("output_text('ok');").expect("parse args");
|
||||
assert_eq!(args.code, "output_text('ok');");
|
||||
assert_eq!(args.yield_time_ms, None);
|
||||
assert_eq!(args.max_output_tokens, None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_freeform_args_with_pragma() {
|
||||
let input = concat!(
|
||||
"// @exec: {\"yield_time_ms\": 15000, \"max_output_tokens\": 2000}\n",
|
||||
"output_text('ok');",
|
||||
);
|
||||
let args = parse_freeform_args(input).expect("parse args");
|
||||
assert_eq!(args.code, "output_text('ok');");
|
||||
assert_eq!(args.yield_time_ms, Some(15_000));
|
||||
assert_eq!(args.max_output_tokens, Some(2_000));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_freeform_args_rejects_unknown_key() {
|
||||
let err = parse_freeform_args("// @exec: {\"nope\": 1}\noutput_text('ok');")
|
||||
.expect_err("expected error");
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"exec pragma only supports `yield_time_ms` and `max_output_tokens`; got `nope`"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_freeform_args_rejects_missing_source() {
|
||||
let err = parse_freeform_args("// @exec: {\"yield_time_ms\": 10}").expect_err("expected error");
|
||||
assert_eq!(
|
||||
err.to_string(),
|
||||
"exec pragma must be followed by JavaScript source on subsequent lines"
|
||||
);
|
||||
}
|
||||
|
|
@ -33,6 +33,7 @@ const CODE_MODE_RUNNER_SOURCE: &str = include_str!("runner.cjs");
|
|||
const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
|
||||
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
|
||||
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
|
||||
const CODE_MODE_PRAGMA_PREFIX: &str = "// @exec:";
|
||||
|
||||
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
|
||||
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
|
||||
|
|
@ -222,6 +223,7 @@ fn enabled_tool_from_spec(spec: ToolSpec) -> Option<protocol::EnabledTool> {
|
|||
}
|
||||
|
||||
let reference = code_mode_tool_reference(&tool_name);
|
||||
let global_name = normalize_code_mode_identifier(&tool_name);
|
||||
let (description, kind) = match spec {
|
||||
ToolSpec::Function(tool) => (tool.description, protocol::CodeModeToolKind::Function),
|
||||
ToolSpec::Freeform(tool) => (tool.description, protocol::CodeModeToolKind::Freeform),
|
||||
|
|
@ -234,8 +236,8 @@ fn enabled_tool_from_spec(spec: ToolSpec) -> Option<protocol::EnabledTool> {
|
|||
};
|
||||
|
||||
Some(protocol::EnabledTool {
|
||||
global_name: normalize_code_mode_identifier(&tool_name),
|
||||
tool_name,
|
||||
global_name,
|
||||
module_path: reference.module_path,
|
||||
namespace: reference.namespace,
|
||||
name: normalize_code_mode_identifier(&reference.tool_key),
|
||||
|
|
|
|||
|
|
@ -46,6 +46,8 @@ pub(super) enum HostToNodeMessage {
|
|||
enabled_tools: Vec<EnabledTool>,
|
||||
stored_values: HashMap<String, JsonValue>,
|
||||
source: String,
|
||||
yield_time_ms: Option<u64>,
|
||||
max_output_tokens: Option<usize>,
|
||||
},
|
||||
Poll {
|
||||
request_id: String,
|
||||
|
|
|
|||
|
|
@ -47,22 +47,6 @@ function codeModeWorkerMain() {
|
|||
const vm = require('node:vm');
|
||||
const { SourceTextModule, SyntheticModule } = vm;
|
||||
|
||||
const DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL = 10000;
|
||||
|
||||
function normalizeMaxOutputTokensPerExecCall(value) {
|
||||
if (!Number.isSafeInteger(value) || value < 0) {
|
||||
throw new TypeError('max_output_tokens_per_exec_call must be a non-negative safe integer');
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function normalizeYieldTime(value) {
|
||||
if (!Number.isSafeInteger(value) || value < 0) {
|
||||
throw new TypeError('yield_time must be a non-negative safe integer');
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
function formatErrorText(error) {
|
||||
return String(error && error.stack ? error.stack : error);
|
||||
}
|
||||
|
|
@ -270,23 +254,6 @@ function codeModeWorkerMain() {
|
|||
ensureContentItems(context).push(item);
|
||||
return item;
|
||||
};
|
||||
const setMaxOutputTokensPerExecCall = (value) => {
|
||||
const normalized = normalizeMaxOutputTokensPerExecCall(value);
|
||||
state.maxOutputTokensPerExecCall = normalized;
|
||||
parentPort.postMessage({
|
||||
type: 'set_max_output_tokens_per_exec_call',
|
||||
value: normalized,
|
||||
});
|
||||
return normalized;
|
||||
};
|
||||
const setYieldTime = (value) => {
|
||||
const normalized = normalizeYieldTime(value);
|
||||
parentPort.postMessage({
|
||||
type: 'set_yield_time',
|
||||
value: normalized,
|
||||
});
|
||||
return normalized;
|
||||
};
|
||||
const yieldControl = () => {
|
||||
parentPort.postMessage({ type: 'yield' });
|
||||
};
|
||||
|
|
@ -296,8 +263,6 @@ function codeModeWorkerMain() {
|
|||
load,
|
||||
output_image: image,
|
||||
output_text: text,
|
||||
set_max_output_tokens_per_exec_call: setMaxOutputTokensPerExecCall,
|
||||
set_yield_time: setYieldTime,
|
||||
store,
|
||||
text,
|
||||
yield_control: yieldControl,
|
||||
|
|
@ -306,27 +271,12 @@ function codeModeWorkerMain() {
|
|||
|
||||
function createCodeModeModule(context, helpers) {
|
||||
return new SyntheticModule(
|
||||
[
|
||||
'image',
|
||||
'load',
|
||||
'output_text',
|
||||
'output_image',
|
||||
'set_max_output_tokens_per_exec_call',
|
||||
'set_yield_time',
|
||||
'store',
|
||||
'text',
|
||||
'yield_control',
|
||||
],
|
||||
['image', 'load', 'output_text', 'output_image', 'store', 'text', 'yield_control'],
|
||||
function initCodeModeModule() {
|
||||
this.setExport('image', helpers.image);
|
||||
this.setExport('load', helpers.load);
|
||||
this.setExport('output_text', helpers.output_text);
|
||||
this.setExport('output_image', helpers.output_image);
|
||||
this.setExport(
|
||||
'set_max_output_tokens_per_exec_call',
|
||||
helpers.set_max_output_tokens_per_exec_call
|
||||
);
|
||||
this.setExport('set_yield_time', helpers.set_yield_time);
|
||||
this.setExport('store', helpers.store);
|
||||
this.setExport('text', helpers.text);
|
||||
this.setExport('yield_control', helpers.yield_control);
|
||||
|
|
@ -340,8 +290,6 @@ function codeModeWorkerMain() {
|
|||
ALL_TOOLS: createAllToolsMetadata(enabledTools),
|
||||
image: helpers.image,
|
||||
load: helpers.load,
|
||||
set_max_output_tokens_per_exec_call: helpers.set_max_output_tokens_per_exec_call,
|
||||
set_yield_time: helpers.set_yield_time,
|
||||
store: helpers.store,
|
||||
text: helpers.text,
|
||||
tools: createGlobalToolsNamespace(callTool, enabledTools),
|
||||
|
|
@ -475,7 +423,6 @@ function codeModeWorkerMain() {
|
|||
async function main() {
|
||||
const start = workerData ?? {};
|
||||
const state = {
|
||||
maxOutputTokensPerExecCall: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
|
||||
storedValues: cloneJsonValue(start.stored_values ?? {}),
|
||||
};
|
||||
const callTool = createToolCaller();
|
||||
|
|
@ -650,6 +597,10 @@ function sessionWorkerSource() {
|
|||
}
|
||||
|
||||
function startSession(protocol, sessions, start) {
|
||||
const maxOutputTokensPerExecCall =
|
||||
start.max_output_tokens == null
|
||||
? DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL
|
||||
: normalizeMaxOutputTokensPerExecCall(start.max_output_tokens);
|
||||
const session = {
|
||||
completed: false,
|
||||
content_items: [],
|
||||
|
|
@ -657,7 +608,7 @@ function startSession(protocol, sessions, start) {
|
|||
id: start.cell_id,
|
||||
initial_yield_timer: null,
|
||||
initial_yield_triggered: false,
|
||||
max_output_tokens_per_exec_call: DEFAULT_MAX_OUTPUT_TOKENS_PER_EXEC_CALL,
|
||||
max_output_tokens_per_exec_call: maxOutputTokensPerExecCall,
|
||||
pending_result: null,
|
||||
poll_yield_timer: null,
|
||||
request_id: String(start.request_id),
|
||||
|
|
@ -667,7 +618,11 @@ function startSession(protocol, sessions, start) {
|
|||
}),
|
||||
};
|
||||
sessions.set(session.id, session);
|
||||
scheduleInitialYield(protocol, session, session.default_yield_time_ms);
|
||||
const initialYieldTime =
|
||||
start.yield_time_ms == null
|
||||
? session.default_yield_time_ms
|
||||
: normalizeYieldTime(start.yield_time_ms);
|
||||
scheduleInitialYield(protocol, session, initialYieldTime);
|
||||
|
||||
session.worker.on('message', (message) => {
|
||||
void handleWorkerMessage(protocol, sessions, session, message).catch((error) => {
|
||||
|
|
@ -706,16 +661,6 @@ async function handleWorkerMessage(protocol, sessions, session, message) {
|
|||
return;
|
||||
}
|
||||
|
||||
if (message.type === 'set_yield_time') {
|
||||
scheduleInitialYield(protocol, session, normalizeYieldTime(message.value ?? 0));
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === 'set_max_output_tokens_per_exec_call') {
|
||||
session.max_output_tokens_per_exec_call = normalizeMaxOutputTokensPerExecCall(message.value);
|
||||
return;
|
||||
}
|
||||
|
||||
if (message.type === 'yield') {
|
||||
void sendYielded(protocol, session);
|
||||
return;
|
||||
|
|
|
|||
|
|
@ -2018,8 +2018,13 @@ fn create_js_repl_reset_tool() -> ToolSpec {
|
|||
|
||||
fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec {
|
||||
const CODE_MODE_FREEFORM_GRAMMAR: &str = r#"
|
||||
start: source
|
||||
source: /[\s\S]+/
|
||||
start: pragma_source | plain_source
|
||||
pragma_source: PRAGMA_LINE NEWLINE SOURCE
|
||||
plain_source: SOURCE
|
||||
|
||||
PRAGMA_LINE: /[ \t]*\/\/ @exec:[^\r\n]*/
|
||||
NEWLINE: /\r?\n/
|
||||
SOURCE: /[\s\S]+/
|
||||
"#;
|
||||
|
||||
ToolSpec::Freeform(FreeformTool {
|
||||
|
|
|
|||
|
|
@ -28,11 +28,13 @@ use std::time::Instant;
|
|||
use wiremock::MockServer;
|
||||
|
||||
fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
req.custom_tool_call_output(call_id)
|
||||
.get("output")
|
||||
.and_then(Value::as_array)
|
||||
.expect("custom tool output should be serialized as content items")
|
||||
.clone()
|
||||
match req.custom_tool_call_output(call_id).get("output") {
|
||||
Some(Value::Array(items)) => items.clone(),
|
||||
Some(Value::String(text)) => {
|
||||
vec![serde_json::json!({ "type": "input_text", "text": text })]
|
||||
}
|
||||
_ => panic!("custom tool output should be serialized as text or content items"),
|
||||
}
|
||||
}
|
||||
|
||||
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
|
|
@ -332,9 +334,7 @@ async fn code_mode_can_truncate_final_result_with_configured_budget() -> Result<
|
|||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"use exec to truncate the final result",
|
||||
r#"
|
||||
set_max_output_tokens_per_exec_call(6);
|
||||
|
||||
r#"// @exec: {"max_output_tokens": 6}
|
||||
text(JSON.stringify(await tools.exec_command({
|
||||
cmd: "printf 'token one token two token three token four token five token six token seven'",
|
||||
max_output_tokens: 100
|
||||
|
|
@ -427,7 +427,7 @@ async fn code_mode_can_yield_and_resume_with_exec_wait() -> Result<()> {
|
|||
let code = format!(
|
||||
r#"
|
||||
text("phase 1");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{phase_2_wait}
|
||||
text("phase 2");
|
||||
{phase_3_wait}
|
||||
|
|
@ -566,9 +566,8 @@ async fn code_mode_yield_timeout_works_for_busy_loop() -> Result<()> {
|
|||
});
|
||||
let test = builder.build(&server).await?;
|
||||
|
||||
let code = r#"
|
||||
let code = r#"// @exec: {"yield_time_ms": 100}
|
||||
text("phase 1");
|
||||
set_yield_time(10);
|
||||
while (true) {}
|
||||
"#;
|
||||
|
||||
|
|
@ -668,7 +667,7 @@ async fn code_mode_can_run_multiple_yielded_sessions() -> Result<()> {
|
|||
let session_a_code = format!(
|
||||
r#"
|
||||
text("session a start");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{session_a_wait}
|
||||
text("session a done");
|
||||
"#
|
||||
|
|
@ -676,7 +675,7 @@ text("session a done");
|
|||
let session_b_code = format!(
|
||||
r#"
|
||||
text("session b start");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{session_b_wait}
|
||||
text("session b done");
|
||||
"#
|
||||
|
|
@ -834,7 +833,7 @@ async fn code_mode_exec_wait_can_terminate_and_continue() -> Result<()> {
|
|||
let code = format!(
|
||||
r#"
|
||||
text("phase 1");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{termination_wait}
|
||||
text("phase 2");
|
||||
"#
|
||||
|
|
@ -1028,7 +1027,7 @@ async fn code_mode_exec_wait_terminate_returns_completed_session_if_it_finished_
|
|||
let session_a_code = format!(
|
||||
r#"
|
||||
text("session a start");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{session_a_wait}
|
||||
text("session a done");
|
||||
await tools.exec_command({{ cmd: {session_a_done_command:?} }});
|
||||
|
|
@ -1037,7 +1036,7 @@ await tools.exec_command({{ cmd: {session_a_done_command:?} }});
|
|||
let session_b_code = format!(
|
||||
r#"
|
||||
text("session b start");
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{session_b_wait}
|
||||
text("session b done");
|
||||
"#
|
||||
|
|
@ -1308,10 +1307,9 @@ async fn code_mode_exec_wait_uses_its_own_max_tokens_budget() -> Result<()> {
|
|||
let completion_wait = wait_for_file_source(&completion_gate)?;
|
||||
|
||||
let code = format!(
|
||||
r#"
|
||||
r#"// @exec: {{"max_output_tokens": 100}}
|
||||
text("phase 1");
|
||||
set_max_output_tokens_per_exec_call(100);
|
||||
set_yield_time(10);
|
||||
yield_control();
|
||||
{completion_wait}
|
||||
text("token one token two token three token four token five token six token seven");
|
||||
"#
|
||||
|
|
@ -1630,6 +1628,42 @@ contentLength=0"
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exposes_namespaced_mcp_tools_on_global_tools_object() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let code = r#"
|
||||
text(JSON.stringify({
|
||||
hasExecCommand: typeof tools.exec_command === "function",
|
||||
hasNamespacedEcho: typeof tools.mcp__rmcp__echo === "function",
|
||||
}));
|
||||
"#;
|
||||
|
||||
let (_test, second_mock) =
|
||||
run_code_mode_turn_with_rmcp(&server, "use exec to inspect the global tools object", code)
|
||||
.await?;
|
||||
|
||||
let req = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&req, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"exec global tools inspection failed unexpectedly: {output}"
|
||||
);
|
||||
|
||||
let parsed: Value = serde_json::from_str(&output)?;
|
||||
assert_eq!(
|
||||
parsed,
|
||||
serde_json::json!({
|
||||
"hasExecCommand": !cfg!(windows),
|
||||
"hasNamespacedEcho": true,
|
||||
})
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exposes_normalized_illegal_mcp_tool_names() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
@ -1736,6 +1770,7 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
|
|||
"WeakSet",
|
||||
"WebAssembly",
|
||||
"__codexContentItems",
|
||||
"add_content",
|
||||
"console",
|
||||
"decodeURI",
|
||||
"decodeURIComponent",
|
||||
|
|
@ -1750,8 +1785,6 @@ text(JSON.stringify(Object.getOwnPropertyNames(globalThis).sort()));
|
|||
"load",
|
||||
"parseFloat",
|
||||
"parseInt",
|
||||
"set_max_output_tokens_per_exec_call",
|
||||
"set_yield_time",
|
||||
"store",
|
||||
"text",
|
||||
"tools",
|
||||
|
|
@ -1918,6 +1951,7 @@ structuredContent=null"
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_store_and_load_values_across_turns() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue