From 477a2dd3458be962178abc891422215bf3c22f52 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Fri, 13 Mar 2026 13:30:19 -0700 Subject: [PATCH] Add code_mode_only feature (#14617) Summary - add the code_mode_only feature flag/config schema and wire its dependency on code_mode - update code mode tool descriptions to list nested tools with detailed headers - restrict available tools for prompt and exec descriptions when code_mode_only is enabled and test the behavior Testing - Not run (not requested) --- codex-rs/core/config.schema.json | 6 ++ codex-rs/core/src/codex.rs | 2 +- codex-rs/core/src/compact_remote.rs | 2 +- codex-rs/core/src/features.rs | 11 +++ codex-rs/core/src/features_tests.rs | 10 +++ codex-rs/core/src/tools/code_mode/mod.rs | 44 ++++++++--- codex-rs/core/src/tools/router.rs | 29 ++++++- codex-rs/core/src/tools/spec.rs | 37 +++++---- codex-rs/core/src/tools/spec_tests.rs | 95 ++++++++++++++++++++++- codex-rs/core/tests/suite/code_mode.rs | 97 ++++++++++++++++++++++++ 10 files changed, 302 insertions(+), 31 deletions(-) diff --git a/codex-rs/core/config.schema.json b/codex-rs/core/config.schema.json index 0f1768937..2681a070b 100644 --- a/codex-rs/core/config.schema.json +++ b/codex-rs/core/config.schema.json @@ -339,6 +339,9 @@ "code_mode": { "type": "boolean" }, + "code_mode_only": { + "type": "boolean" + }, "codex_git_commit": { "type": "boolean" }, @@ -1880,6 +1883,9 @@ "code_mode": { "type": "boolean" }, + "code_mode_only": { + "type": "boolean" + }, "codex_git_commit": { "type": "boolean" }, diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 10e8bebf8..cf382d2bf 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -6175,7 +6175,7 @@ fn build_prompt( ) -> Prompt { Prompt { input, - tools: router.specs(), + tools: router.model_visible_specs(), parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls, base_instructions, personality: turn_context.personality, diff --git a/codex-rs/core/src/compact_remote.rs b/codex-rs/core/src/compact_remote.rs index 718166cb0..339065f72 100644 --- a/codex-rs/core/src/compact_remote.rs +++ b/codex-rs/core/src/compact_remote.rs @@ -107,7 +107,7 @@ async fn run_remote_compact_task_inner_impl( .await?; let prompt = Prompt { input: prompt_input, - tools: tool_router.specs(), + tools: tool_router.model_visible_specs(), parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls, base_instructions, personality: turn_context.personality, diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 771fe8083..12abb6fd6 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -87,6 +87,8 @@ pub enum Feature { JsRepl, /// Enable a minimal JavaScript mode backed by Node's built-in vm runtime. CodeMode, + /// Restrict model-visible tools to code mode entrypoints (`exec`, `exec_wait`). + CodeModeOnly, /// Only expose js_repl tools directly to the model. JsReplToolsOnly, /// Use the single unified PTY-backed exec tool. @@ -429,6 +431,9 @@ impl Features { if self.enabled(Feature::SpawnCsv) && !self.enabled(Feature::Collab) { self.enable(Feature::Collab); } + if self.enabled(Feature::CodeModeOnly) && !self.enabled(Feature::CodeMode) { + self.enable(Feature::CodeMode); + } if self.enabled(Feature::JsReplToolsOnly) && !self.enabled(Feature::JsRepl) { tracing::warn!("js_repl_tools_only requires js_repl; disabling js_repl_tools_only"); self.disable(Feature::JsReplToolsOnly); @@ -558,6 +563,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::UnderDevelopment, default_enabled: false, }, + FeatureSpec { + id: Feature::CodeModeOnly, + key: "code_mode_only", + stage: Stage::UnderDevelopment, + default_enabled: false, + }, FeatureSpec { id: Feature::JsReplToolsOnly, key: "js_repl_tools_only", diff --git a/codex-rs/core/src/features_tests.rs b/codex-rs/core/src/features_tests.rs index 895cee1b8..4098279c4 100644 --- a/codex-rs/core/src/features_tests.rs +++ b/codex-rs/core/src/features_tests.rs @@ -58,6 +58,16 @@ fn js_repl_is_experimental_and_user_toggleable() { assert_eq!(Feature::JsRepl.default_enabled(), false); } +#[test] +fn code_mode_only_requires_code_mode() { + let mut features = Features::with_defaults(); + features.enable(Feature::CodeModeOnly); + features.normalize_dependencies(); + + assert_eq!(features.enabled(Feature::CodeModeOnly), true); + assert_eq!(features.enabled(Feature::CodeMode), true); +} + #[test] fn guardian_approval_is_experimental_and_user_toggleable() { let spec = Feature::GuardianApproval.info(); diff --git a/codex-rs/core/src/tools/code_mode/mod.rs b/codex-rs/core/src/tools/code_mode/mod.rs index ae362693e..fd79f9a7d 100644 --- a/codex-rs/core/src/tools/code_mode/mod.rs +++ b/codex-rs/core/src/tools/code_mode/mod.rs @@ -34,9 +34,14 @@ const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js"); const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md"); const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md"); const CODE_MODE_PRAGMA_PREFIX: &str = "// @exec:"; +const CODE_MODE_ONLY_PREFACE: &str = "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"; pub(crate) const PUBLIC_TOOL_NAME: &str = "exec"; pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait"; + +pub(crate) fn is_code_mode_nested_tool(tool_name: &str) -> bool { + tool_name != PUBLIC_TOOL_NAME && tool_name != WAIT_TOOL_NAME +} pub(crate) const DEFAULT_EXEC_YIELD_TIME_MS: u64 = 10_000; pub(crate) const DEFAULT_WAIT_YIELD_TIME_MS: u64 = 10_000; @@ -62,16 +67,33 @@ enum CodeModeExecutionStatus { Terminated, } -pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String { - let enabled_list = if enabled_tool_names.is_empty() { - "none".to_string() - } else { - enabled_tool_names.join(", ") - }; - format!( - "{}\n- Enabled nested tools: {enabled_list}.", - CODE_MODE_DESCRIPTION_TEMPLATE.trim_end() - ) +pub(crate) fn tool_description(enabled_tools: &[(String, String)], code_mode_only: bool) -> String { + let description_template = CODE_MODE_DESCRIPTION_TEMPLATE.trim_end(); + if !code_mode_only { + return description_template.to_string(); + } + + let mut sections = vec![ + CODE_MODE_ONLY_PREFACE.to_string(), + description_template.to_string(), + ]; + + if !enabled_tools.is_empty() { + let nested_tool_reference = enabled_tools + .iter() + .map(|(name, nested_description)| { + let global_name = normalize_code_mode_identifier(name); + format!( + "### `{global_name}` (`{name}`)\n{}", + nested_description.trim() + ) + }) + .collect::>() + .join("\n\n"); + sections.push(nested_tool_reference); + } + + sections.join("\n\n") } pub(crate) fn wait_tool_description() -> &'static str { @@ -218,7 +240,7 @@ async fn build_enabled_tools(exec: &ExecContext) -> Vec { fn enabled_tool_from_spec(spec: ToolSpec) -> Option { let tool_name = spec.name().to_string(); - if tool_name == PUBLIC_TOOL_NAME || tool_name == WAIT_TOOL_NAME { + if !is_code_mode_nested_tool(&tool_name) { return None; } diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index e211d83ce..b41c59ef9 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -4,6 +4,7 @@ use crate::codex::TurnContext; use crate::function_tool::FunctionCallError; use crate::mcp_connection_manager::ToolInfo; use crate::sandboxing::SandboxPermissions; +use crate::tools::code_mode::is_code_mode_nested_tool; use crate::tools::context::FunctionToolOutput; use crate::tools::context::SharedTurnDiffTracker; use crate::tools::context::ToolInvocation; @@ -39,6 +40,7 @@ pub struct ToolCall { pub struct ToolRouter { registry: ToolRegistry, specs: Vec, + model_visible_specs: Vec, } pub(crate) struct ToolRouterParams<'a> { @@ -64,8 +66,29 @@ impl ToolRouter { dynamic_tools, ); let (specs, registry) = builder.build(); + let model_visible_specs = if config.code_mode_only_enabled { + specs + .iter() + .filter_map(|configured_tool| { + if !is_code_mode_nested_tool(configured_tool.spec.name()) { + Some(configured_tool.spec.clone()) + } else { + None + } + }) + .collect() + } else { + specs + .iter() + .map(|configured_tool| configured_tool.spec.clone()) + .collect() + }; - Self { registry, specs } + Self { + registry, + specs, + model_visible_specs, + } } pub fn specs(&self) -> Vec { @@ -75,6 +98,10 @@ impl ToolRouter { .collect() } + pub fn model_visible_specs(&self) -> Vec { + self.model_visible_specs.clone() + } + pub fn find_spec(&self, tool_name: &str) -> Option { self.specs .iter() diff --git a/codex-rs/core/src/tools/spec.rs b/codex-rs/core/src/tools/spec.rs index a931940fe..17bab04bd 100644 --- a/codex-rs/core/src/tools/spec.rs +++ b/codex-rs/core/src/tools/spec.rs @@ -10,6 +10,7 @@ use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig; use crate::original_image_detail::can_request_original_image_detail; use crate::tools::code_mode::PUBLIC_TOOL_NAME; use crate::tools::code_mode::WAIT_TOOL_NAME; +use crate::tools::code_mode::is_code_mode_nested_tool; use crate::tools::code_mode::tool_description as code_mode_tool_description; use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description; use crate::tools::code_mode_description::augment_tool_spec_for_code_mode; @@ -226,6 +227,7 @@ pub(crate) struct ToolsConfig { pub exec_permission_approvals_enabled: bool, pub request_permissions_tool_enabled: bool, pub code_mode_enabled: bool, + pub code_mode_only_enabled: bool, pub js_repl_enabled: bool, pub js_repl_tools_only: bool, pub can_request_original_image_detail: bool, @@ -274,6 +276,7 @@ impl ToolsConfig { } = params; let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform); let include_code_mode = features.enabled(Feature::CodeMode); + let include_code_mode_only = include_code_mode && features.enabled(Feature::CodeModeOnly); let include_js_repl = features.enabled(Feature::JsRepl); let include_js_repl_tools_only = include_js_repl && features.enabled(Feature::JsReplToolsOnly); @@ -363,6 +366,7 @@ impl ToolsConfig { exec_permission_approvals_enabled, request_permissions_tool_enabled, code_mode_enabled: include_code_mode, + code_mode_only_enabled: include_code_mode_only, js_repl_enabled: include_js_repl, js_repl_tools_only: include_js_repl_tools_only, can_request_original_image_detail: include_original_image_detail, @@ -394,6 +398,7 @@ impl ToolsConfig { pub fn for_code_mode_nested_tools(&self) -> Self { let mut nested = self.clone(); nested.code_mode_enabled = false; + nested.code_mode_only_enabled = false; nested } } @@ -1995,7 +2000,10 @@ fn create_js_repl_reset_tool() -> ToolSpec { }) } -fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec { +fn create_code_mode_tool( + enabled_tools: &[(String, String)], + code_mode_only_enabled: bool, +) -> ToolSpec { const CODE_MODE_FREEFORM_GRAMMAR: &str = r#" start: pragma_source | plain_source pragma_source: PRAGMA_LINE NEWLINE SOURCE @@ -2008,7 +2016,7 @@ SOURCE: /[\s\S]+/ ToolSpec::Freeform(FreeformTool { name: PUBLIC_TOOL_NAME.to_string(), - description: code_mode_tool_description(enabled_tool_names), + description: code_mode_tool_description(enabled_tools, code_mode_only_enabled), format: FreeformToolFormat { r#type: "grammar".to_string(), syntax: "lark".to_string(), @@ -2017,12 +2025,6 @@ SOURCE: /[\s\S]+/ }) } -fn is_code_mode_nested_tool(spec: &ToolSpec) -> bool { - spec.name() != PUBLIC_TOOL_NAME - && spec.name() != WAIT_TOOL_NAME - && matches!(spec, ToolSpec::Function(_) | ToolSpec::Freeform(_)) -} - fn create_list_mcp_resources_tool() -> ToolSpec { let properties = BTreeMap::from([ ( @@ -2475,17 +2477,22 @@ pub(crate) fn build_specs_with_discoverable_tools( dynamic_tools, ) .build(); - let mut enabled_tool_names = nested_specs + let mut enabled_tools = nested_specs .into_iter() - .map(|spec| spec.spec) - .filter(is_code_mode_nested_tool) - .map(|spec| spec.name().to_string()) + .filter_map(|spec| { + let (name, description) = match augment_tool_spec_for_code_mode(spec.spec, true) { + ToolSpec::Function(tool) => (tool.name, tool.description), + ToolSpec::Freeform(tool) => (tool.name, tool.description), + _ => return None, + }; + is_code_mode_nested_tool(&name).then_some((name, description)) + }) .collect::>(); - enabled_tool_names.sort(); - enabled_tool_names.dedup(); + enabled_tools.sort_by(|left, right| left.0.cmp(&right.0)); + enabled_tools.dedup_by(|left, right| left.0 == right.0); push_tool_spec( &mut builder, - create_code_mode_tool(&enabled_tool_names), + create_code_mode_tool(&enabled_tools, config.code_mode_only_enabled), false, config.code_mode_enabled, ); diff --git a/codex-rs/core/src/tools/spec_tests.rs b/codex-rs/core/src/tools/spec_tests.rs index c8dd7f21e..11be40855 100644 --- a/codex-rs/core/src/tools/spec_tests.rs +++ b/codex-rs/core/src/tools/spec_tests.rs @@ -2,7 +2,9 @@ use crate::client_common::tools::FreeformTool; use crate::config::test_config; use crate::models_manager::manager::ModelsManager; use crate::models_manager::model_info::with_config_overrides; +use crate::tools::ToolRouter; use crate::tools::registry::ConfiguredToolSpec; +use crate::tools::router::ToolRouterParams; use codex_app_server_protocol::AppInfo; use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::ModelInfo; @@ -933,8 +935,20 @@ fn assert_model_tools( sandbox_policy: &SandboxPolicy::DangerFullAccess, windows_sandbox_level: WindowsSandboxLevel::Disabled, }); - let (tools, _) = build_specs(&tools_config, None, None, &[]).build(); - let tool_names = tools.iter().map(|t| t.spec.name()).collect::>(); + let router = ToolRouter::from_config( + &tools_config, + ToolRouterParams { + mcp_tools: None, + app_tools: None, + discoverable_tools: None, + dynamic_tools: &[], + }, + ); + let model_visible_specs = router.model_visible_specs(); + let tool_names = model_visible_specs + .iter() + .map(ToolSpec::name) + .collect::>(); assert_eq!(&tool_names, &expected_tools,); } @@ -2488,6 +2502,83 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() { ); } +#[test] +fn code_mode_only_restricts_model_tools_to_exec_tools() { + let mut features = Features::with_defaults(); + features.enable(Feature::CodeMode); + features.enable(Feature::CodeModeOnly); + + assert_model_tools( + "gpt-5.1-codex", + &features, + Some(WebSearchMode::Live), + &["exec", "exec_wait"], + ); +} + +#[test] +fn code_mode_only_exec_description_includes_full_nested_tool_details() { + let config = test_config(); + let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::CodeMode); + features.enable(Feature::CodeModeOnly); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + + let (tools, _) = build_specs(&tools_config, None, None, &[]).build(); + let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec + else { + panic!("expected freeform tool"); + }; + + assert!(!description.contains("Enabled nested tools:")); + assert!(!description.contains("Nested tool reference:")); + assert!(description.starts_with( + "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly" + )); + assert!(description.contains("### `update_plan` (`update_plan`)")); + assert!(description.contains("### `view_image` (`view_image`)")); +} + +#[test] +fn code_mode_exec_description_omits_nested_tool_details_when_not_code_mode_only() { + let config = test_config(); + let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config); + let mut features = Features::with_defaults(); + features.enable(Feature::CodeMode); + let available_models = Vec::new(); + let tools_config = ToolsConfig::new(&ToolsConfigParams { + model_info: &model_info, + available_models: &available_models, + features: &features, + web_search_mode: Some(WebSearchMode::Cached), + session_source: SessionSource::Cli, + sandbox_policy: &SandboxPolicy::DangerFullAccess, + windows_sandbox_level: WindowsSandboxLevel::Disabled, + }); + + let (tools, _) = build_specs(&tools_config, None, None, &[]).build(); + let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec + else { + panic!("expected freeform tool"); + }; + + assert!(!description.starts_with( + "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly" + )); + assert!(!description.contains("### `update_plan` (`update_plan`)")); + assert!(!description.contains("### `view_image` (`view_image`)")); +} + #[test] fn chat_tools_include_top_level_name() { let properties = diff --git a/codex-rs/core/tests/suite/code_mode.rs b/codex-rs/core/tests/suite/code_mode.rs index 6db519dd1..08867c514 100644 --- a/codex-rs/core/tests/suite/code_mode.rs +++ b/codex-rs/core/tests/suite/code_mode.rs @@ -37,6 +37,23 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec } } +fn tool_names(body: &Value) -> Vec { + body.get("tools") + .and_then(Value::as_array) + .map(|tools| { + tools + .iter() + .filter_map(|tool| { + tool.get("name") + .or_else(|| tool.get("type")) + .and_then(Value::as_str) + .map(str::to_string) + }) + .collect() + }) + .unwrap_or_default() +} + fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec { match req.function_call_output(call_id).get("output") { Some(Value::Array(items)) => items.clone(), @@ -233,6 +250,86 @@ text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marke Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_only_restricts_prompt_tools() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + let resp_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-1"), + ]), + ) + .await; + + let mut builder = test_codex().with_config(|config| { + let _ = config.features.enable(Feature::CodeModeOnly); + }); + let test = builder.build(&server).await?; + test.submit_turn("list tools in code mode only").await?; + + let first_body = resp_mock.single_request().body_json(); + assert_eq!( + tool_names(&first_body), + vec!["exec".to_string(), "exec_wait".to_string()] + ); + + Ok(()) +} + +#[cfg_attr(windows, ignore = "no exec_command on Windows")] +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn code_mode_only_can_call_nested_tools() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = responses::start_mock_server().await; + responses::mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-1"), + ev_custom_tool_call( + "call-1", + "exec", + r#" +const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" }); +text(output.output); +"#, + ), + ev_completed("resp-1"), + ]), + ) + .await; + let follow_up_mock = responses::mount_sse_once( + &server, + sse(vec![ + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ) + .await; + + let mut builder = test_codex().with_config(|config| { + let _ = config.features.enable(Feature::CodeModeOnly); + }); + let test = builder.build(&server).await?; + test.submit_turn("use exec to run nested tool in code mode only") + .await?; + + let request = follow_up_mock.single_request(); + let (output, success) = custom_tool_output_body_and_success(&request, "call-1"); + assert_ne!( + success, + Some(false), + "code_mode_only nested tool call failed unexpectedly: {output}" + ); + assert_eq!(output, "code_mode_only_nested_tool_marker"); + + Ok(()) +} + #[cfg_attr(windows, ignore = "flaky on windows")] #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {