Add code_mode_only feature (#14617)
Summary - add the code_mode_only feature flag/config schema and wire its dependency on code_mode - update code mode tool descriptions to list nested tools with detailed headers - restrict available tools for prompt and exec descriptions when code_mode_only is enabled and test the behavior Testing - Not run (not requested)
This commit is contained in:
parent
ef37d313c6
commit
477a2dd345
10 changed files with 302 additions and 31 deletions
|
|
@ -339,6 +339,9 @@
|
|||
"code_mode": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"code_mode_only": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"codex_git_commit": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
|
@ -1880,6 +1883,9 @@
|
|||
"code_mode": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"code_mode_only": {
|
||||
"type": "boolean"
|
||||
},
|
||||
"codex_git_commit": {
|
||||
"type": "boolean"
|
||||
},
|
||||
|
|
|
|||
|
|
@ -6175,7 +6175,7 @@ fn build_prompt(
|
|||
) -> Prompt {
|
||||
Prompt {
|
||||
input,
|
||||
tools: router.specs(),
|
||||
tools: router.model_visible_specs(),
|
||||
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
|
||||
base_instructions,
|
||||
personality: turn_context.personality,
|
||||
|
|
|
|||
|
|
@ -107,7 +107,7 @@ async fn run_remote_compact_task_inner_impl(
|
|||
.await?;
|
||||
let prompt = Prompt {
|
||||
input: prompt_input,
|
||||
tools: tool_router.specs(),
|
||||
tools: tool_router.model_visible_specs(),
|
||||
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
|
||||
base_instructions,
|
||||
personality: turn_context.personality,
|
||||
|
|
|
|||
|
|
@ -87,6 +87,8 @@ pub enum Feature {
|
|||
JsRepl,
|
||||
/// Enable a minimal JavaScript mode backed by Node's built-in vm runtime.
|
||||
CodeMode,
|
||||
/// Restrict model-visible tools to code mode entrypoints (`exec`, `exec_wait`).
|
||||
CodeModeOnly,
|
||||
/// Only expose js_repl tools directly to the model.
|
||||
JsReplToolsOnly,
|
||||
/// Use the single unified PTY-backed exec tool.
|
||||
|
|
@ -429,6 +431,9 @@ impl Features {
|
|||
if self.enabled(Feature::SpawnCsv) && !self.enabled(Feature::Collab) {
|
||||
self.enable(Feature::Collab);
|
||||
}
|
||||
if self.enabled(Feature::CodeModeOnly) && !self.enabled(Feature::CodeMode) {
|
||||
self.enable(Feature::CodeMode);
|
||||
}
|
||||
if self.enabled(Feature::JsReplToolsOnly) && !self.enabled(Feature::JsRepl) {
|
||||
tracing::warn!("js_repl_tools_only requires js_repl; disabling js_repl_tools_only");
|
||||
self.disable(Feature::JsReplToolsOnly);
|
||||
|
|
@ -558,6 +563,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
|||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::CodeModeOnly,
|
||||
key: "code_mode_only",
|
||||
stage: Stage::UnderDevelopment,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::JsReplToolsOnly,
|
||||
key: "js_repl_tools_only",
|
||||
|
|
|
|||
|
|
@ -58,6 +58,16 @@ fn js_repl_is_experimental_and_user_toggleable() {
|
|||
assert_eq!(Feature::JsRepl.default_enabled(), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_only_requires_code_mode() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::CodeModeOnly);
|
||||
features.normalize_dependencies();
|
||||
|
||||
assert_eq!(features.enabled(Feature::CodeModeOnly), true);
|
||||
assert_eq!(features.enabled(Feature::CodeMode), true);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn guardian_approval_is_experimental_and_user_toggleable() {
|
||||
let spec = Feature::GuardianApproval.info();
|
||||
|
|
|
|||
|
|
@ -34,9 +34,14 @@ const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
|
|||
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
|
||||
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
|
||||
const CODE_MODE_PRAGMA_PREFIX: &str = "// @exec:";
|
||||
const CODE_MODE_ONLY_PREFACE: &str = "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly";
|
||||
|
||||
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
|
||||
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
|
||||
|
||||
pub(crate) fn is_code_mode_nested_tool(tool_name: &str) -> bool {
|
||||
tool_name != PUBLIC_TOOL_NAME && tool_name != WAIT_TOOL_NAME
|
||||
}
|
||||
pub(crate) const DEFAULT_EXEC_YIELD_TIME_MS: u64 = 10_000;
|
||||
pub(crate) const DEFAULT_WAIT_YIELD_TIME_MS: u64 = 10_000;
|
||||
|
||||
|
|
@ -62,16 +67,33 @@ enum CodeModeExecutionStatus {
|
|||
Terminated,
|
||||
}
|
||||
|
||||
pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
|
||||
let enabled_list = if enabled_tool_names.is_empty() {
|
||||
"none".to_string()
|
||||
} else {
|
||||
enabled_tool_names.join(", ")
|
||||
};
|
||||
format!(
|
||||
"{}\n- Enabled nested tools: {enabled_list}.",
|
||||
CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
|
||||
)
|
||||
pub(crate) fn tool_description(enabled_tools: &[(String, String)], code_mode_only: bool) -> String {
|
||||
let description_template = CODE_MODE_DESCRIPTION_TEMPLATE.trim_end();
|
||||
if !code_mode_only {
|
||||
return description_template.to_string();
|
||||
}
|
||||
|
||||
let mut sections = vec![
|
||||
CODE_MODE_ONLY_PREFACE.to_string(),
|
||||
description_template.to_string(),
|
||||
];
|
||||
|
||||
if !enabled_tools.is_empty() {
|
||||
let nested_tool_reference = enabled_tools
|
||||
.iter()
|
||||
.map(|(name, nested_description)| {
|
||||
let global_name = normalize_code_mode_identifier(name);
|
||||
format!(
|
||||
"### `{global_name}` (`{name}`)\n{}",
|
||||
nested_description.trim()
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
sections.push(nested_tool_reference);
|
||||
}
|
||||
|
||||
sections.join("\n\n")
|
||||
}
|
||||
|
||||
pub(crate) fn wait_tool_description() -> &'static str {
|
||||
|
|
@ -218,7 +240,7 @@ async fn build_enabled_tools(exec: &ExecContext) -> Vec<protocol::EnabledTool> {
|
|||
|
||||
fn enabled_tool_from_spec(spec: ToolSpec) -> Option<protocol::EnabledTool> {
|
||||
let tool_name = spec.name().to_string();
|
||||
if tool_name == PUBLIC_TOOL_NAME || tool_name == WAIT_TOOL_NAME {
|
||||
if !is_code_mode_nested_tool(&tool_name) {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ use crate::codex::TurnContext;
|
|||
use crate::function_tool::FunctionCallError;
|
||||
use crate::mcp_connection_manager::ToolInfo;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::code_mode::is_code_mode_nested_tool;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
|
|
@ -39,6 +40,7 @@ pub struct ToolCall {
|
|||
pub struct ToolRouter {
|
||||
registry: ToolRegistry,
|
||||
specs: Vec<ConfiguredToolSpec>,
|
||||
model_visible_specs: Vec<ToolSpec>,
|
||||
}
|
||||
|
||||
pub(crate) struct ToolRouterParams<'a> {
|
||||
|
|
@ -64,8 +66,29 @@ impl ToolRouter {
|
|||
dynamic_tools,
|
||||
);
|
||||
let (specs, registry) = builder.build();
|
||||
let model_visible_specs = if config.code_mode_only_enabled {
|
||||
specs
|
||||
.iter()
|
||||
.filter_map(|configured_tool| {
|
||||
if !is_code_mode_nested_tool(configured_tool.spec.name()) {
|
||||
Some(configured_tool.spec.clone())
|
||||
} else {
|
||||
None
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
specs
|
||||
.iter()
|
||||
.map(|configured_tool| configured_tool.spec.clone())
|
||||
.collect()
|
||||
};
|
||||
|
||||
Self { registry, specs }
|
||||
Self {
|
||||
registry,
|
||||
specs,
|
||||
model_visible_specs,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn specs(&self) -> Vec<ToolSpec> {
|
||||
|
|
@ -75,6 +98,10 @@ impl ToolRouter {
|
|||
.collect()
|
||||
}
|
||||
|
||||
pub fn model_visible_specs(&self) -> Vec<ToolSpec> {
|
||||
self.model_visible_specs.clone()
|
||||
}
|
||||
|
||||
pub fn find_spec(&self, tool_name: &str) -> Option<ToolSpec> {
|
||||
self.specs
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
|
|||
use crate::original_image_detail::can_request_original_image_detail;
|
||||
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
|
||||
use crate::tools::code_mode::WAIT_TOOL_NAME;
|
||||
use crate::tools::code_mode::is_code_mode_nested_tool;
|
||||
use crate::tools::code_mode::tool_description as code_mode_tool_description;
|
||||
use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
|
||||
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
|
||||
|
|
@ -226,6 +227,7 @@ pub(crate) struct ToolsConfig {
|
|||
pub exec_permission_approvals_enabled: bool,
|
||||
pub request_permissions_tool_enabled: bool,
|
||||
pub code_mode_enabled: bool,
|
||||
pub code_mode_only_enabled: bool,
|
||||
pub js_repl_enabled: bool,
|
||||
pub js_repl_tools_only: bool,
|
||||
pub can_request_original_image_detail: bool,
|
||||
|
|
@ -274,6 +276,7 @@ impl ToolsConfig {
|
|||
} = params;
|
||||
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
|
||||
let include_code_mode = features.enabled(Feature::CodeMode);
|
||||
let include_code_mode_only = include_code_mode && features.enabled(Feature::CodeModeOnly);
|
||||
let include_js_repl = features.enabled(Feature::JsRepl);
|
||||
let include_js_repl_tools_only =
|
||||
include_js_repl && features.enabled(Feature::JsReplToolsOnly);
|
||||
|
|
@ -363,6 +366,7 @@ impl ToolsConfig {
|
|||
exec_permission_approvals_enabled,
|
||||
request_permissions_tool_enabled,
|
||||
code_mode_enabled: include_code_mode,
|
||||
code_mode_only_enabled: include_code_mode_only,
|
||||
js_repl_enabled: include_js_repl,
|
||||
js_repl_tools_only: include_js_repl_tools_only,
|
||||
can_request_original_image_detail: include_original_image_detail,
|
||||
|
|
@ -394,6 +398,7 @@ impl ToolsConfig {
|
|||
pub fn for_code_mode_nested_tools(&self) -> Self {
|
||||
let mut nested = self.clone();
|
||||
nested.code_mode_enabled = false;
|
||||
nested.code_mode_only_enabled = false;
|
||||
nested
|
||||
}
|
||||
}
|
||||
|
|
@ -1995,7 +2000,10 @@ fn create_js_repl_reset_tool() -> ToolSpec {
|
|||
})
|
||||
}
|
||||
|
||||
fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec {
|
||||
fn create_code_mode_tool(
|
||||
enabled_tools: &[(String, String)],
|
||||
code_mode_only_enabled: bool,
|
||||
) -> ToolSpec {
|
||||
const CODE_MODE_FREEFORM_GRAMMAR: &str = r#"
|
||||
start: pragma_source | plain_source
|
||||
pragma_source: PRAGMA_LINE NEWLINE SOURCE
|
||||
|
|
@ -2008,7 +2016,7 @@ SOURCE: /[\s\S]+/
|
|||
|
||||
ToolSpec::Freeform(FreeformTool {
|
||||
name: PUBLIC_TOOL_NAME.to_string(),
|
||||
description: code_mode_tool_description(enabled_tool_names),
|
||||
description: code_mode_tool_description(enabled_tools, code_mode_only_enabled),
|
||||
format: FreeformToolFormat {
|
||||
r#type: "grammar".to_string(),
|
||||
syntax: "lark".to_string(),
|
||||
|
|
@ -2017,12 +2025,6 @@ SOURCE: /[\s\S]+/
|
|||
})
|
||||
}
|
||||
|
||||
fn is_code_mode_nested_tool(spec: &ToolSpec) -> bool {
|
||||
spec.name() != PUBLIC_TOOL_NAME
|
||||
&& spec.name() != WAIT_TOOL_NAME
|
||||
&& matches!(spec, ToolSpec::Function(_) | ToolSpec::Freeform(_))
|
||||
}
|
||||
|
||||
fn create_list_mcp_resources_tool() -> ToolSpec {
|
||||
let properties = BTreeMap::from([
|
||||
(
|
||||
|
|
@ -2475,17 +2477,22 @@ pub(crate) fn build_specs_with_discoverable_tools(
|
|||
dynamic_tools,
|
||||
)
|
||||
.build();
|
||||
let mut enabled_tool_names = nested_specs
|
||||
let mut enabled_tools = nested_specs
|
||||
.into_iter()
|
||||
.map(|spec| spec.spec)
|
||||
.filter(is_code_mode_nested_tool)
|
||||
.map(|spec| spec.name().to_string())
|
||||
.filter_map(|spec| {
|
||||
let (name, description) = match augment_tool_spec_for_code_mode(spec.spec, true) {
|
||||
ToolSpec::Function(tool) => (tool.name, tool.description),
|
||||
ToolSpec::Freeform(tool) => (tool.name, tool.description),
|
||||
_ => return None,
|
||||
};
|
||||
is_code_mode_nested_tool(&name).then_some((name, description))
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
enabled_tool_names.sort();
|
||||
enabled_tool_names.dedup();
|
||||
enabled_tools.sort_by(|left, right| left.0.cmp(&right.0));
|
||||
enabled_tools.dedup_by(|left, right| left.0 == right.0);
|
||||
push_tool_spec(
|
||||
&mut builder,
|
||||
create_code_mode_tool(&enabled_tool_names),
|
||||
create_code_mode_tool(&enabled_tools, config.code_mode_only_enabled),
|
||||
false,
|
||||
config.code_mode_enabled,
|
||||
);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,9 @@ use crate::client_common::tools::FreeformTool;
|
|||
use crate::config::test_config;
|
||||
use crate::models_manager::manager::ModelsManager;
|
||||
use crate::models_manager::model_info::with_config_overrides;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::registry::ConfiguredToolSpec;
|
||||
use crate::tools::router::ToolRouterParams;
|
||||
use codex_app_server_protocol::AppInfo;
|
||||
use codex_protocol::openai_models::InputModality;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
|
|
@ -933,8 +935,20 @@ fn assert_model_tools(
|
|||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
windows_sandbox_level: WindowsSandboxLevel::Disabled,
|
||||
});
|
||||
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
|
||||
let tool_names = tools.iter().map(|t| t.spec.name()).collect::<Vec<_>>();
|
||||
let router = ToolRouter::from_config(
|
||||
&tools_config,
|
||||
ToolRouterParams {
|
||||
mcp_tools: None,
|
||||
app_tools: None,
|
||||
discoverable_tools: None,
|
||||
dynamic_tools: &[],
|
||||
},
|
||||
);
|
||||
let model_visible_specs = router.model_visible_specs();
|
||||
let tool_names = model_visible_specs
|
||||
.iter()
|
||||
.map(ToolSpec::name)
|
||||
.collect::<Vec<_>>();
|
||||
assert_eq!(&tool_names, &expected_tools,);
|
||||
}
|
||||
|
||||
|
|
@ -2488,6 +2502,83 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_only_restricts_model_tools_to_exec_tools() {
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::CodeMode);
|
||||
features.enable(Feature::CodeModeOnly);
|
||||
|
||||
assert_model_tools(
|
||||
"gpt-5.1-codex",
|
||||
&features,
|
||||
Some(WebSearchMode::Live),
|
||||
&["exec", "exec_wait"],
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_only_exec_description_includes_full_nested_tool_details() {
|
||||
let config = test_config();
|
||||
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::CodeMode);
|
||||
features.enable(Feature::CodeModeOnly);
|
||||
let available_models = Vec::new();
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
available_models: &available_models,
|
||||
features: &features,
|
||||
web_search_mode: Some(WebSearchMode::Cached),
|
||||
session_source: SessionSource::Cli,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
windows_sandbox_level: WindowsSandboxLevel::Disabled,
|
||||
});
|
||||
|
||||
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
|
||||
let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec
|
||||
else {
|
||||
panic!("expected freeform tool");
|
||||
};
|
||||
|
||||
assert!(!description.contains("Enabled nested tools:"));
|
||||
assert!(!description.contains("Nested tool reference:"));
|
||||
assert!(description.starts_with(
|
||||
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
|
||||
));
|
||||
assert!(description.contains("### `update_plan` (`update_plan`)"));
|
||||
assert!(description.contains("### `view_image` (`view_image`)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn code_mode_exec_description_omits_nested_tool_details_when_not_code_mode_only() {
|
||||
let config = test_config();
|
||||
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
|
||||
let mut features = Features::with_defaults();
|
||||
features.enable(Feature::CodeMode);
|
||||
let available_models = Vec::new();
|
||||
let tools_config = ToolsConfig::new(&ToolsConfigParams {
|
||||
model_info: &model_info,
|
||||
available_models: &available_models,
|
||||
features: &features,
|
||||
web_search_mode: Some(WebSearchMode::Cached),
|
||||
session_source: SessionSource::Cli,
|
||||
sandbox_policy: &SandboxPolicy::DangerFullAccess,
|
||||
windows_sandbox_level: WindowsSandboxLevel::Disabled,
|
||||
});
|
||||
|
||||
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
|
||||
let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec
|
||||
else {
|
||||
panic!("expected freeform tool");
|
||||
};
|
||||
|
||||
assert!(!description.starts_with(
|
||||
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
|
||||
));
|
||||
assert!(!description.contains("### `update_plan` (`update_plan`)"));
|
||||
assert!(!description.contains("### `view_image` (`view_image`)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chat_tools_include_top_level_name() {
|
||||
let properties =
|
||||
|
|
|
|||
|
|
@ -37,6 +37,23 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value>
|
|||
}
|
||||
}
|
||||
|
||||
fn tool_names(body: &Value) -> Vec<String> {
|
||||
body.get("tools")
|
||||
.and_then(Value::as_array)
|
||||
.map(|tools| {
|
||||
tools
|
||||
.iter()
|
||||
.filter_map(|tool| {
|
||||
tool.get("name")
|
||||
.or_else(|| tool.get("type"))
|
||||
.and_then(Value::as_str)
|
||||
.map(str::to_string)
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
|
||||
match req.function_call_output(call_id).get("output") {
|
||||
Some(Value::Array(items)) => items.clone(),
|
||||
|
|
@ -233,6 +250,86 @@ text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marke
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let resp_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("list tools in code mode only").await?;
|
||||
|
||||
let first_body = resp_mock.single_request().body_json();
|
||||
assert_eq!(
|
||||
tool_names(&first_body),
|
||||
vec!["exec".to_string(), "exec_wait".to_string()]
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_custom_tool_call(
|
||||
"call-1",
|
||||
"exec",
|
||||
r#"
|
||||
const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" });
|
||||
text(output.output);
|
||||
"#,
|
||||
),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
let follow_up_mock = responses::mount_sse_once(
|
||||
&server,
|
||||
sse(vec![
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
let _ = config.features.enable(Feature::CodeModeOnly);
|
||||
});
|
||||
let test = builder.build(&server).await?;
|
||||
test.submit_turn("use exec to run nested tool in code mode only")
|
||||
.await?;
|
||||
|
||||
let request = follow_up_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"code_mode_only nested tool call failed unexpectedly: {output}"
|
||||
);
|
||||
assert_eq!(output, "code_mode_only_nested_tool_marker");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "flaky on windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue