Add code_mode_only feature (#14617)

Summary
- add the code_mode_only feature flag/config schema and wire its
dependency on code_mode
- update code mode tool descriptions to list nested tools with detailed
headers
- restrict available tools for prompt and exec descriptions when
code_mode_only is enabled and test the behavior

Testing
- Not run (not requested)
This commit is contained in:
pakrym-oai 2026-03-13 13:30:19 -07:00 committed by GitHub
parent ef37d313c6
commit 477a2dd345
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 302 additions and 31 deletions

View file

@ -339,6 +339,9 @@
"code_mode": {
"type": "boolean"
},
"code_mode_only": {
"type": "boolean"
},
"codex_git_commit": {
"type": "boolean"
},
@ -1880,6 +1883,9 @@
"code_mode": {
"type": "boolean"
},
"code_mode_only": {
"type": "boolean"
},
"codex_git_commit": {
"type": "boolean"
},

View file

@ -6175,7 +6175,7 @@ fn build_prompt(
) -> Prompt {
Prompt {
input,
tools: router.specs(),
tools: router.model_visible_specs(),
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
base_instructions,
personality: turn_context.personality,

View file

@ -107,7 +107,7 @@ async fn run_remote_compact_task_inner_impl(
.await?;
let prompt = Prompt {
input: prompt_input,
tools: tool_router.specs(),
tools: tool_router.model_visible_specs(),
parallel_tool_calls: turn_context.model_info.supports_parallel_tool_calls,
base_instructions,
personality: turn_context.personality,

View file

@ -87,6 +87,8 @@ pub enum Feature {
JsRepl,
/// Enable a minimal JavaScript mode backed by Node's built-in vm runtime.
CodeMode,
/// Restrict model-visible tools to code mode entrypoints (`exec`, `exec_wait`).
CodeModeOnly,
/// Only expose js_repl tools directly to the model.
JsReplToolsOnly,
/// Use the single unified PTY-backed exec tool.
@ -429,6 +431,9 @@ impl Features {
if self.enabled(Feature::SpawnCsv) && !self.enabled(Feature::Collab) {
self.enable(Feature::Collab);
}
if self.enabled(Feature::CodeModeOnly) && !self.enabled(Feature::CodeMode) {
self.enable(Feature::CodeMode);
}
if self.enabled(Feature::JsReplToolsOnly) && !self.enabled(Feature::JsRepl) {
tracing::warn!("js_repl_tools_only requires js_repl; disabling js_repl_tools_only");
self.disable(Feature::JsReplToolsOnly);
@ -558,6 +563,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::CodeModeOnly,
key: "code_mode_only",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::JsReplToolsOnly,
key: "js_repl_tools_only",

View file

@ -58,6 +58,16 @@ fn js_repl_is_experimental_and_user_toggleable() {
assert_eq!(Feature::JsRepl.default_enabled(), false);
}
#[test]
fn code_mode_only_requires_code_mode() {
let mut features = Features::with_defaults();
features.enable(Feature::CodeModeOnly);
features.normalize_dependencies();
assert_eq!(features.enabled(Feature::CodeModeOnly), true);
assert_eq!(features.enabled(Feature::CodeMode), true);
}
#[test]
fn guardian_approval_is_experimental_and_user_toggleable() {
let spec = Feature::GuardianApproval.info();

View file

@ -34,9 +34,14 @@ const CODE_MODE_BRIDGE_SOURCE: &str = include_str!("bridge.js");
const CODE_MODE_DESCRIPTION_TEMPLATE: &str = include_str!("description.md");
const CODE_MODE_WAIT_DESCRIPTION_TEMPLATE: &str = include_str!("wait_description.md");
const CODE_MODE_PRAGMA_PREFIX: &str = "// @exec:";
const CODE_MODE_ONLY_PREFACE: &str = "Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly";
pub(crate) const PUBLIC_TOOL_NAME: &str = "exec";
pub(crate) const WAIT_TOOL_NAME: &str = "exec_wait";
pub(crate) fn is_code_mode_nested_tool(tool_name: &str) -> bool {
tool_name != PUBLIC_TOOL_NAME && tool_name != WAIT_TOOL_NAME
}
pub(crate) const DEFAULT_EXEC_YIELD_TIME_MS: u64 = 10_000;
pub(crate) const DEFAULT_WAIT_YIELD_TIME_MS: u64 = 10_000;
@ -62,16 +67,33 @@ enum CodeModeExecutionStatus {
Terminated,
}
pub(crate) fn tool_description(enabled_tool_names: &[String]) -> String {
let enabled_list = if enabled_tool_names.is_empty() {
"none".to_string()
} else {
enabled_tool_names.join(", ")
};
format!(
"{}\n- Enabled nested tools: {enabled_list}.",
CODE_MODE_DESCRIPTION_TEMPLATE.trim_end()
)
pub(crate) fn tool_description(enabled_tools: &[(String, String)], code_mode_only: bool) -> String {
let description_template = CODE_MODE_DESCRIPTION_TEMPLATE.trim_end();
if !code_mode_only {
return description_template.to_string();
}
let mut sections = vec![
CODE_MODE_ONLY_PREFACE.to_string(),
description_template.to_string(),
];
if !enabled_tools.is_empty() {
let nested_tool_reference = enabled_tools
.iter()
.map(|(name, nested_description)| {
let global_name = normalize_code_mode_identifier(name);
format!(
"### `{global_name}` (`{name}`)\n{}",
nested_description.trim()
)
})
.collect::<Vec<_>>()
.join("\n\n");
sections.push(nested_tool_reference);
}
sections.join("\n\n")
}
pub(crate) fn wait_tool_description() -> &'static str {
@ -218,7 +240,7 @@ async fn build_enabled_tools(exec: &ExecContext) -> Vec<protocol::EnabledTool> {
fn enabled_tool_from_spec(spec: ToolSpec) -> Option<protocol::EnabledTool> {
let tool_name = spec.name().to_string();
if tool_name == PUBLIC_TOOL_NAME || tool_name == WAIT_TOOL_NAME {
if !is_code_mode_nested_tool(&tool_name) {
return None;
}

View file

@ -4,6 +4,7 @@ use crate::codex::TurnContext;
use crate::function_tool::FunctionCallError;
use crate::mcp_connection_manager::ToolInfo;
use crate::sandboxing::SandboxPermissions;
use crate::tools::code_mode::is_code_mode_nested_tool;
use crate::tools::context::FunctionToolOutput;
use crate::tools::context::SharedTurnDiffTracker;
use crate::tools::context::ToolInvocation;
@ -39,6 +40,7 @@ pub struct ToolCall {
pub struct ToolRouter {
registry: ToolRegistry,
specs: Vec<ConfiguredToolSpec>,
model_visible_specs: Vec<ToolSpec>,
}
pub(crate) struct ToolRouterParams<'a> {
@ -64,8 +66,29 @@ impl ToolRouter {
dynamic_tools,
);
let (specs, registry) = builder.build();
let model_visible_specs = if config.code_mode_only_enabled {
specs
.iter()
.filter_map(|configured_tool| {
if !is_code_mode_nested_tool(configured_tool.spec.name()) {
Some(configured_tool.spec.clone())
} else {
None
}
})
.collect()
} else {
specs
.iter()
.map(|configured_tool| configured_tool.spec.clone())
.collect()
};
Self { registry, specs }
Self {
registry,
specs,
model_visible_specs,
}
}
pub fn specs(&self) -> Vec<ToolSpec> {
@ -75,6 +98,10 @@ impl ToolRouter {
.collect()
}
pub fn model_visible_specs(&self) -> Vec<ToolSpec> {
self.model_visible_specs.clone()
}
pub fn find_spec(&self, tool_name: &str) -> Option<ToolSpec> {
self.specs
.iter()

View file

@ -10,6 +10,7 @@ use crate::models_manager::collaboration_mode_presets::CollaborationModesConfig;
use crate::original_image_detail::can_request_original_image_detail;
use crate::tools::code_mode::PUBLIC_TOOL_NAME;
use crate::tools::code_mode::WAIT_TOOL_NAME;
use crate::tools::code_mode::is_code_mode_nested_tool;
use crate::tools::code_mode::tool_description as code_mode_tool_description;
use crate::tools::code_mode::wait_tool_description as code_mode_wait_tool_description;
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
@ -226,6 +227,7 @@ pub(crate) struct ToolsConfig {
pub exec_permission_approvals_enabled: bool,
pub request_permissions_tool_enabled: bool,
pub code_mode_enabled: bool,
pub code_mode_only_enabled: bool,
pub js_repl_enabled: bool,
pub js_repl_tools_only: bool,
pub can_request_original_image_detail: bool,
@ -274,6 +276,7 @@ impl ToolsConfig {
} = params;
let include_apply_patch_tool = features.enabled(Feature::ApplyPatchFreeform);
let include_code_mode = features.enabled(Feature::CodeMode);
let include_code_mode_only = include_code_mode && features.enabled(Feature::CodeModeOnly);
let include_js_repl = features.enabled(Feature::JsRepl);
let include_js_repl_tools_only =
include_js_repl && features.enabled(Feature::JsReplToolsOnly);
@ -363,6 +366,7 @@ impl ToolsConfig {
exec_permission_approvals_enabled,
request_permissions_tool_enabled,
code_mode_enabled: include_code_mode,
code_mode_only_enabled: include_code_mode_only,
js_repl_enabled: include_js_repl,
js_repl_tools_only: include_js_repl_tools_only,
can_request_original_image_detail: include_original_image_detail,
@ -394,6 +398,7 @@ impl ToolsConfig {
pub fn for_code_mode_nested_tools(&self) -> Self {
let mut nested = self.clone();
nested.code_mode_enabled = false;
nested.code_mode_only_enabled = false;
nested
}
}
@ -1995,7 +2000,10 @@ fn create_js_repl_reset_tool() -> ToolSpec {
})
}
fn create_code_mode_tool(enabled_tool_names: &[String]) -> ToolSpec {
fn create_code_mode_tool(
enabled_tools: &[(String, String)],
code_mode_only_enabled: bool,
) -> ToolSpec {
const CODE_MODE_FREEFORM_GRAMMAR: &str = r#"
start: pragma_source | plain_source
pragma_source: PRAGMA_LINE NEWLINE SOURCE
@ -2008,7 +2016,7 @@ SOURCE: /[\s\S]+/
ToolSpec::Freeform(FreeformTool {
name: PUBLIC_TOOL_NAME.to_string(),
description: code_mode_tool_description(enabled_tool_names),
description: code_mode_tool_description(enabled_tools, code_mode_only_enabled),
format: FreeformToolFormat {
r#type: "grammar".to_string(),
syntax: "lark".to_string(),
@ -2017,12 +2025,6 @@ SOURCE: /[\s\S]+/
})
}
fn is_code_mode_nested_tool(spec: &ToolSpec) -> bool {
spec.name() != PUBLIC_TOOL_NAME
&& spec.name() != WAIT_TOOL_NAME
&& matches!(spec, ToolSpec::Function(_) | ToolSpec::Freeform(_))
}
fn create_list_mcp_resources_tool() -> ToolSpec {
let properties = BTreeMap::from([
(
@ -2475,17 +2477,22 @@ pub(crate) fn build_specs_with_discoverable_tools(
dynamic_tools,
)
.build();
let mut enabled_tool_names = nested_specs
let mut enabled_tools = nested_specs
.into_iter()
.map(|spec| spec.spec)
.filter(is_code_mode_nested_tool)
.map(|spec| spec.name().to_string())
.filter_map(|spec| {
let (name, description) = match augment_tool_spec_for_code_mode(spec.spec, true) {
ToolSpec::Function(tool) => (tool.name, tool.description),
ToolSpec::Freeform(tool) => (tool.name, tool.description),
_ => return None,
};
is_code_mode_nested_tool(&name).then_some((name, description))
})
.collect::<Vec<_>>();
enabled_tool_names.sort();
enabled_tool_names.dedup();
enabled_tools.sort_by(|left, right| left.0.cmp(&right.0));
enabled_tools.dedup_by(|left, right| left.0 == right.0);
push_tool_spec(
&mut builder,
create_code_mode_tool(&enabled_tool_names),
create_code_mode_tool(&enabled_tools, config.code_mode_only_enabled),
false,
config.code_mode_enabled,
);

View file

@ -2,7 +2,9 @@ use crate::client_common::tools::FreeformTool;
use crate::config::test_config;
use crate::models_manager::manager::ModelsManager;
use crate::models_manager::model_info::with_config_overrides;
use crate::tools::ToolRouter;
use crate::tools::registry::ConfiguredToolSpec;
use crate::tools::router::ToolRouterParams;
use codex_app_server_protocol::AppInfo;
use codex_protocol::openai_models::InputModality;
use codex_protocol::openai_models::ModelInfo;
@ -933,8 +935,20 @@ fn assert_model_tools(
sandbox_policy: &SandboxPolicy::DangerFullAccess,
windows_sandbox_level: WindowsSandboxLevel::Disabled,
});
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
let tool_names = tools.iter().map(|t| t.spec.name()).collect::<Vec<_>>();
let router = ToolRouter::from_config(
&tools_config,
ToolRouterParams {
mcp_tools: None,
app_tools: None,
discoverable_tools: None,
dynamic_tools: &[],
},
);
let model_visible_specs = router.model_visible_specs();
let tool_names = model_visible_specs
.iter()
.map(ToolSpec::name)
.collect::<Vec<_>>();
assert_eq!(&tool_names, &expected_tools,);
}
@ -2488,6 +2502,83 @@ fn code_mode_augments_mcp_tool_descriptions_with_namespaced_sample() {
);
}
#[test]
fn code_mode_only_restricts_model_tools_to_exec_tools() {
let mut features = Features::with_defaults();
features.enable(Feature::CodeMode);
features.enable(Feature::CodeModeOnly);
assert_model_tools(
"gpt-5.1-codex",
&features,
Some(WebSearchMode::Live),
&["exec", "exec_wait"],
);
}
#[test]
fn code_mode_only_exec_description_includes_full_nested_tool_details() {
let config = test_config();
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
let mut features = Features::with_defaults();
features.enable(Feature::CodeMode);
features.enable(Feature::CodeModeOnly);
let available_models = Vec::new();
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
available_models: &available_models,
features: &features,
web_search_mode: Some(WebSearchMode::Cached),
session_source: SessionSource::Cli,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
windows_sandbox_level: WindowsSandboxLevel::Disabled,
});
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec
else {
panic!("expected freeform tool");
};
assert!(!description.contains("Enabled nested tools:"));
assert!(!description.contains("Nested tool reference:"));
assert!(description.starts_with(
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
));
assert!(description.contains("### `update_plan` (`update_plan`)"));
assert!(description.contains("### `view_image` (`view_image`)"));
}
#[test]
fn code_mode_exec_description_omits_nested_tool_details_when_not_code_mode_only() {
let config = test_config();
let model_info = ModelsManager::construct_model_info_offline_for_tests("gpt-5-codex", &config);
let mut features = Features::with_defaults();
features.enable(Feature::CodeMode);
let available_models = Vec::new();
let tools_config = ToolsConfig::new(&ToolsConfigParams {
model_info: &model_info,
available_models: &available_models,
features: &features,
web_search_mode: Some(WebSearchMode::Cached),
session_source: SessionSource::Cli,
sandbox_policy: &SandboxPolicy::DangerFullAccess,
windows_sandbox_level: WindowsSandboxLevel::Disabled,
});
let (tools, _) = build_specs(&tools_config, None, None, &[]).build();
let ToolSpec::Freeform(FreeformTool { description, .. }) = &find_tool(&tools, "exec").spec
else {
panic!("expected freeform tool");
};
assert!(!description.starts_with(
"Use `exec/exec_wait` tool to run all other tools, do not attempt to use any other tools directly"
));
assert!(!description.contains("### `update_plan` (`update_plan`)"));
assert!(!description.contains("### `view_image` (`view_image`)"));
}
#[test]
fn chat_tools_include_top_level_name() {
let properties =

View file

@ -37,6 +37,23 @@ fn custom_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value>
}
}
fn tool_names(body: &Value) -> Vec<String> {
body.get("tools")
.and_then(Value::as_array)
.map(|tools| {
tools
.iter()
.filter_map(|tool| {
tool.get("name")
.or_else(|| tool.get("type"))
.and_then(Value::as_str)
.map(str::to_string)
})
.collect()
})
.unwrap_or_default()
}
fn function_tool_output_items(req: &ResponsesRequest, call_id: &str) -> Vec<Value> {
match req.function_call_output(call_id).get("output") {
Some(Value::Array(items)) => items.clone(),
@ -233,6 +250,86 @@ text(JSON.stringify(await tools.exec_command({ cmd: "printf code_mode_exec_marke
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_restricts_prompt_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
let resp_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-1"),
]),
)
.await;
let mut builder = test_codex().with_config(|config| {
let _ = config.features.enable(Feature::CodeModeOnly);
});
let test = builder.build(&server).await?;
test.submit_turn("list tools in code mode only").await?;
let first_body = resp_mock.single_request().body_json();
assert_eq!(
tool_names(&first_body),
vec!["exec".to_string(), "exec_wait".to_string()]
);
Ok(())
}
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_only_can_call_nested_tools() -> Result<()> {
skip_if_no_network!(Ok(()));
let server = responses::start_mock_server().await;
responses::mount_sse_once(
&server,
sse(vec![
ev_response_created("resp-1"),
ev_custom_tool_call(
"call-1",
"exec",
r#"
const output = await tools.exec_command({ cmd: "printf code_mode_only_nested_tool_marker" });
text(output.output);
"#,
),
ev_completed("resp-1"),
]),
)
.await;
let follow_up_mock = responses::mount_sse_once(
&server,
sse(vec![
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
)
.await;
let mut builder = test_codex().with_config(|config| {
let _ = config.features.enable(Feature::CodeModeOnly);
});
let test = builder.build(&server).await?;
test.submit_turn("use exec to run nested tool in code mode only")
.await?;
let request = follow_up_mock.single_request();
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
assert_ne!(
success,
Some(false),
"code_mode_only nested tool call failed unexpectedly: {output}"
);
assert_eq!(output, "code_mode_only_nested_tool_marker");
Ok(())
}
#[cfg_attr(windows, ignore = "flaky on windows")]
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn code_mode_nested_tool_calls_can_run_in_parallel() -> Result<()> {