From 53a486f7ea370dfc34a1b46214b7456d69e5ee3c Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Sun, 7 Dec 2025 09:47:48 -0800 Subject: [PATCH] Add remote models feature flag (#7648) # External (non-OpenAI) Pull Request Requirements Before opening this Pull Request, please read the dedicated "Contributing" markdown file or your PR may be closed: https://github.com/openai/codex/blob/main/docs/contributing.md If your PR conforms to our contribution guidelines, replace this text with a detailed and high quality description of your changes. Include a link to a bug report or enhancement request. --- codex-rs/codex-api/src/endpoint/models.rs | 5 +- .../codex-api/tests/models_integration.rs | 17 +- codex-rs/core/src/codex.rs | 10 + codex-rs/core/src/features.rs | 8 + .../core/src/openai_models/model_family.rs | 7 +- .../core/src/openai_models/models_manager.rs | 22 ++- codex-rs/core/tests/suite/mod.rs | 1 + codex-rs/core/tests/suite/remote_models.rs | 183 ++++++++++++++++++ codex-rs/protocol/src/openai_models.rs | 64 ++++-- codex-rs/protocol/src/protocol.rs | 2 +- 10 files changed, 292 insertions(+), 27 deletions(-) create mode 100644 codex-rs/core/tests/suite/remote_models.rs diff --git a/codex-rs/codex-api/src/endpoint/models.rs b/codex-rs/codex-api/src/endpoint/models.rs index fec8d7f29..39f7b30c3 100644 --- a/codex-rs/codex-api/src/endpoint/models.rs +++ b/codex-rs/codex-api/src/endpoint/models.rs @@ -181,12 +181,13 @@ mod tests { "display_name": "gpt-test", "description": "desc", "default_reasoning_level": "medium", - "supported_reasoning_levels": ["low", "medium", "high"], + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}, {"effort": "high", "description": "high"}], "shell_type": "shell_command", "visibility": "list", "minimal_client_version": [0, 99, 0], "supported_in_api": true, - "priority": 1 + "priority": 1, + "upgrade": null, })) .unwrap(), ], diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index 3b4077f53..fff9c53f7 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -10,6 +10,7 @@ use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; use http::HeaderMap; use http::Method; use wiremock::Mock; @@ -57,15 +58,25 @@ async fn models_client_hits_models_endpoint() { description: Some("desc".to_string()), default_reasoning_level: ReasoningEffort::Medium, supported_reasoning_levels: vec![ - ReasoningEffort::Low, - ReasoningEffort::Medium, - ReasoningEffort::High, + ReasoningEffortPreset { + effort: ReasoningEffort::Low, + description: ReasoningEffort::Low.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }, + ReasoningEffortPreset { + effort: ReasoningEffort::High, + description: ReasoningEffort::High.to_string(), + }, ], shell_type: ConfigShellToolType::ShellCommand, visibility: ModelVisibility::List, minimal_client_version: ClientVersion(0, 1, 0), supported_in_api: true, priority: 1, + upgrade: None, }], }; diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 89435ee6d..cc758eaed 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -1470,6 +1470,16 @@ async fn submission_loop(sess: Arc, config: Arc, rx_sub: Receiv let mut previous_context: Option> = Some(sess.new_turn(SessionSettingsUpdate::default()).await); + if config.features.enabled(Feature::RemoteModels) + && let Err(err) = sess + .services + .models_manager + .refresh_available_models(&config.model_provider) + .await + { + error!("failed to refresh available models: {err}"); + } + // To break out of this loop, send Op::Shutdown. while let Ok(sub) = rx_sub.recv().await { debug!(?sub, "Submission"); diff --git a/codex-rs/core/src/features.rs b/codex-rs/core/src/features.rs index 1d775360c..69442815e 100644 --- a/codex-rs/core/src/features.rs +++ b/codex-rs/core/src/features.rs @@ -54,6 +54,8 @@ pub enum Feature { WindowsSandbox, /// Remote compaction enabled (only for ChatGPT auth) RemoteCompaction, + /// Refresh remote models and emit AppReady once the list is available. + RemoteModels, /// Allow model to call multiple tools in parallel (only for models supporting it). ParallelToolCalls, /// Experimental skills injection (CLI flag-driven). @@ -333,6 +335,12 @@ pub const FEATURES: &[FeatureSpec] = &[ stage: Stage::Experimental, default_enabled: true, }, + FeatureSpec { + id: Feature::RemoteModels, + key: "remote_models", + stage: Stage::Experimental, + default_enabled: false, + }, FeatureSpec { id: Feature::ParallelToolCalls, key: "parallel", diff --git a/codex-rs/core/src/openai_models/model_family.rs b/codex-rs/core/src/openai_models/model_family.rs index 6ee18ad9e..507e1a48d 100644 --- a/codex-rs/core/src/openai_models/model_family.rs +++ b/codex-rs/core/src/openai_models/model_family.rs @@ -291,6 +291,7 @@ mod tests { use super::*; use codex_protocol::openai_models::ClientVersion; use codex_protocol::openai_models::ModelVisibility; + use codex_protocol::openai_models::ReasoningEffortPreset; fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo { ModelInfo { @@ -298,12 +299,16 @@ mod tests { display_name: slug.to_string(), description: Some(format!("{slug} desc")), default_reasoning_level: effort, - supported_reasoning_levels: vec![effort], + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort, + description: effort.to_string(), + }], shell_type: shell, visibility: ModelVisibility::List, minimal_client_version: ClientVersion(0, 1, 0), supported_in_api: true, priority: 1, + upgrade: None, } } diff --git a/codex-rs/core/src/openai_models/models_manager.rs b/codex-rs/core/src/openai_models/models_manager.rs index 22edf04ff..55c11f455 100644 --- a/codex-rs/core/src/openai_models/models_manager.rs +++ b/codex-rs/core/src/openai_models/models_manager.rs @@ -36,7 +36,6 @@ impl ModelsManager { } } - // do not use this function yet. It's work in progress. pub async fn refresh_available_models( &self, provider: &ModelProviderInfo, @@ -47,16 +46,21 @@ impl ModelsManager { let transport = ReqwestTransport::new(build_reqwest_client()); let client = ModelsClient::new(transport, api_provider, api_auth); + let mut client_version = env!("CARGO_PKG_VERSION"); + if client_version == "0.0.0" { + client_version = "99.99.99"; + } let response = client - .list_models(env!("CARGO_PKG_VERSION"), HeaderMap::new()) + .list_models(client_version, HeaderMap::new()) .await .map_err(map_api_error)?; let models = response.models; *self.remote_models.write().await = models.clone(); + let available_models = self.build_available_models().await; { let mut available_models_guard = self.available_models.write().await; - *available_models_guard = self.build_available_models().await; + *available_models_guard = available_models; } Ok(models) } @@ -75,8 +79,11 @@ impl ModelsManager { async fn build_available_models(&self) -> Vec { let mut available_models = self.remote_models.read().await.clone(); available_models.sort_by(|a, b| b.priority.cmp(&a.priority)); - let mut model_presets: Vec = - available_models.into_iter().map(Into::into).collect(); + let mut model_presets: Vec = available_models + .into_iter() + .map(Into::into) + .filter(|preset: &ModelPreset| preset.show_in_picker) + .collect(); if let Some(default) = model_presets.first_mut() { default.is_default = true; } @@ -103,12 +110,13 @@ mod tests { "display_name": display, "description": format!("{display} desc"), "default_reasoning_level": "medium", - "supported_reasoning_levels": ["low", "medium"], + "supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}], "shell_type": "shell_command", "visibility": "list", "minimal_client_version": [0, 1, 0], "supported_in_api": true, - "priority": priority + "priority": priority, + "upgrade": null, })) .expect("valid model") } diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index e2d78004a..2112cbb7a 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -41,6 +41,7 @@ mod otel; mod prompt_caching; mod quota_exceeded; mod read_file; +mod remote_models; mod resume; mod review; mod rmcp_client; diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs new file mode 100644 index 000000000..4178ed1c2 --- /dev/null +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -0,0 +1,183 @@ +#![cfg(not(target_os = "windows"))] +// unified exec is not supported on Windows OS +use std::sync::Arc; + +use anyhow::Result; +use codex_core::features::Feature; +use codex_core::openai_models::models_manager::ModelsManager; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::ExecCommandSource; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::openai_models::ClientVersion; +use codex_protocol::openai_models::ConfigShellToolType; +use codex_protocol::openai_models::ModelInfo; +use codex_protocol::openai_models::ModelPreset; +use codex_protocol::openai_models::ModelVisibility; +use codex_protocol::openai_models::ModelsResponse; +use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::user_input::UserInput; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_function_call; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_models_once; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::skip_if_no_network; +use core_test_support::skip_if_sandbox; +use core_test_support::test_codex::TestCodex; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; +use core_test_support::wait_for_event_match; +use serde_json::json; +use tokio::time::Duration; +use tokio::time::Instant; +use tokio::time::sleep; +use wiremock::BodyPrintLimit; +use wiremock::MockServer; + +const REMOTE_MODEL_SLUG: &str = "codex-test"; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let remote_model = ModelInfo { + slug: REMOTE_MODEL_SLUG.to_string(), + display_name: "Remote Test".to_string(), + description: Some("A remote model that requires the test shell".to_string()), + default_reasoning_level: ReasoningEffort::Medium, + supported_reasoning_levels: vec![ReasoningEffortPreset { + effort: ReasoningEffort::Medium, + description: ReasoningEffort::Medium.to_string(), + }], + shell_type: ConfigShellToolType::UnifiedExec, + visibility: ModelVisibility::List, + minimal_client_version: ClientVersion(0, 1, 0), + supported_in_api: true, + priority: 1, + upgrade: None, + }; + + let models_mock = mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + }, + ) + .await; + + let mut builder = test_codex().with_config(|config| { + config.features.enable(Feature::RemoteModels); + config.model = "gpt-5.1".to_string(); + }); + + let TestCodex { + codex, + cwd, + config, + conversation_manager, + .. + } = builder.build(&server).await?; + + let models_manager = conversation_manager.get_models_manager(); + let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG).await; + + assert_eq!(available_model.model, REMOTE_MODEL_SLUG); + + let requests = models_mock.requests(); + assert_eq!( + requests.len(), + 1, + "expected a single /models refresh request for the remote models feature" + ); + assert_eq!(requests[0].url.path(), "/v1/models"); + + let family = models_manager + .construct_model_family(REMOTE_MODEL_SLUG, &config) + .await; + assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec); + + codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: None, + sandbox_policy: None, + model: Some(REMOTE_MODEL_SLUG.to_string()), + effort: None, + summary: None, + }) + .await?; + + let call_id = "call"; + let args = json!({ + "cmd": "/bin/echo call", + "yield_time_ms": 250, + }); + let responses = vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-1", "done"), + ev_completed("resp-2"), + ]), + ]; + mount_sse_sequence(&server, responses).await; + + codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "run call".into(), + }], + final_output_json_schema: None, + cwd: cwd.path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::DangerFullAccess, + model: REMOTE_MODEL_SLUG.to_string(), + effort: None, + summary: ReasoningSummary::Auto, + }) + .await?; + + let begin_event = wait_for_event_match(&codex, |msg| match msg { + EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()), + _ => None, + }) + .await; + + assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup); + + wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await; + + Ok(()) +} + +async fn wait_for_model_available(manager: &Arc, slug: &str) -> ModelPreset { + let deadline = Instant::now() + Duration::from_secs(2); + loop { + if let Some(model) = { + let guard = manager.available_models.read().await; + guard.iter().find(|model| model.model == slug).cloned() + } { + return model; + } + if Instant::now() >= deadline { + panic!("timed out waiting for the remote model {slug} to appear"); + } + sleep(Duration::from_millis(25)).await; + } +} diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 02d50627c..0804811a3 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -3,6 +3,7 @@ use std::collections::HashMap; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use strum::IntoEnumIterator; use strum_macros::Display; use strum_macros::EnumIter; use ts_rs::TS; @@ -36,7 +37,7 @@ pub enum ReasoningEffort { } /// A reasoning effort option that can be surfaced for a model. -#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)] +#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)] pub struct ReasoningEffortPreset { /// Effort level that the model supports. pub effort: ReasoningEffort, @@ -123,7 +124,7 @@ pub struct ModelInfo { #[serde(default)] pub description: Option, pub default_reasoning_level: ReasoningEffort, - pub supported_reasoning_levels: Vec, + pub supported_reasoning_levels: Vec, pub shell_type: ConfigShellToolType, #[serde(default = "default_visibility")] pub visibility: ModelVisibility, @@ -132,6 +133,8 @@ pub struct ModelInfo { pub supported_in_api: bool, #[serde(default)] pub priority: i32, + #[serde(default)] + pub upgrade: Option, } /// Response wrapper for `/models`. @@ -149,22 +152,57 @@ impl From for ModelPreset { fn from(info: ModelInfo) -> Self { ModelPreset { id: info.slug.clone(), - model: info.slug, + model: info.slug.clone(), display_name: info.display_name, description: info.description.unwrap_or_default(), default_reasoning_effort: info.default_reasoning_level, - supported_reasoning_efforts: info - .supported_reasoning_levels - .into_iter() - .map(|level| ReasoningEffortPreset { - effort: level, - // todo: add description for each reasoning effort - description: level.to_string(), - }) - .collect(), + supported_reasoning_efforts: info.supported_reasoning_levels.clone(), is_default: false, // default is the highest priority available model - upgrade: None, // no upgrade available (todo: think about it) + upgrade: info.upgrade.as_ref().map(|upgrade_slug| ModelUpgrade { + id: upgrade_slug.clone(), + reasoning_effort_mapping: reasoning_effort_mapping_from_presets( + &info.supported_reasoning_levels, + ), + migration_config_key: info.slug.clone(), + }), show_in_picker: info.visibility == ModelVisibility::List, } } } + +fn reasoning_effort_mapping_from_presets( + presets: &[ReasoningEffortPreset], +) -> Option> { + if presets.is_empty() { + return None; + } + + // Map every canonical effort to the closest supported effort for the new model. + let supported: Vec = presets.iter().map(|p| p.effort).collect(); + let mut map = HashMap::new(); + for effort in ReasoningEffort::iter() { + let nearest = nearest_effort(effort, &supported); + map.insert(effort, nearest); + } + Some(map) +} + +fn effort_rank(effort: ReasoningEffort) -> i32 { + match effort { + ReasoningEffort::None => 0, + ReasoningEffort::Minimal => 1, + ReasoningEffort::Low => 2, + ReasoningEffort::Medium => 3, + ReasoningEffort::High => 4, + ReasoningEffort::XHigh => 5, + } +} + +fn nearest_effort(target: ReasoningEffort, supported: &[ReasoningEffort]) -> ReasoningEffort { + let target_rank = effort_rank(target); + supported + .iter() + .copied() + .min_by_key(|candidate| (effort_rank(*candidate) - target_rank).abs()) + .unwrap_or(target) +} diff --git a/codex-rs/protocol/src/protocol.rs b/codex-rs/protocol/src/protocol.rs index 225a622dc..89b5fd315 100644 --- a/codex-rs/protocol/src/protocol.rs +++ b/codex-rs/protocol/src/protocol.rs @@ -1348,7 +1348,7 @@ pub struct ReviewLineRange { pub end: u32, } -#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] +#[derive(Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)] #[serde(rename_all = "snake_case")] pub enum ExecCommandSource { Agent,