Add remote models feature flag (#7648)

# External (non-OpenAI) Pull Request Requirements

Before opening this Pull Request, please read the dedicated
"Contributing" markdown file or your PR may be closed:
https://github.com/openai/codex/blob/main/docs/contributing.md

If your PR conforms to our contribution guidelines, replace this text
with a detailed and high quality description of your changes.

Include a link to a bug report or enhancement request.
This commit is contained in:
Ahmed Ibrahim 2025-12-07 09:47:48 -08:00 committed by GitHub
parent 3c3d3d1adc
commit 53a486f7ea
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 292 additions and 27 deletions

View file

@ -181,12 +181,13 @@ mod tests {
"display_name": "gpt-test",
"description": "desc",
"default_reasoning_level": "medium",
"supported_reasoning_levels": ["low", "medium", "high"],
"supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}, {"effort": "high", "description": "high"}],
"shell_type": "shell_command",
"visibility": "list",
"minimal_client_version": [0, 99, 0],
"supported_in_api": true,
"priority": 1
"priority": 1,
"upgrade": null,
}))
.unwrap(),
],

View file

@ -10,6 +10,7 @@ use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use http::HeaderMap;
use http::Method;
use wiremock::Mock;
@ -57,15 +58,25 @@ async fn models_client_hits_models_endpoint() {
description: Some("desc".to_string()),
default_reasoning_level: ReasoningEffort::Medium,
supported_reasoning_levels: vec![
ReasoningEffort::Low,
ReasoningEffort::Medium,
ReasoningEffort::High,
ReasoningEffortPreset {
effort: ReasoningEffort::Low,
description: ReasoningEffort::Low.to_string(),
},
ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
},
ReasoningEffortPreset {
effort: ReasoningEffort::High,
description: ReasoningEffort::High.to_string(),
},
],
shell_type: ConfigShellToolType::ShellCommand,
visibility: ModelVisibility::List,
minimal_client_version: ClientVersion(0, 1, 0),
supported_in_api: true,
priority: 1,
upgrade: None,
}],
};

View file

@ -1470,6 +1470,16 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
let mut previous_context: Option<Arc<TurnContext>> =
Some(sess.new_turn(SessionSettingsUpdate::default()).await);
if config.features.enabled(Feature::RemoteModels)
&& let Err(err) = sess
.services
.models_manager
.refresh_available_models(&config.model_provider)
.await
{
error!("failed to refresh available models: {err}");
}
// To break out of this loop, send Op::Shutdown.
while let Ok(sub) = rx_sub.recv().await {
debug!(?sub, "Submission");

View file

@ -54,6 +54,8 @@ pub enum Feature {
WindowsSandbox,
/// Remote compaction enabled (only for ChatGPT auth)
RemoteCompaction,
/// Refresh remote models and emit AppReady once the list is available.
RemoteModels,
/// Allow model to call multiple tools in parallel (only for models supporting it).
ParallelToolCalls,
/// Experimental skills injection (CLI flag-driven).
@ -333,6 +335,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Experimental,
default_enabled: true,
},
FeatureSpec {
id: Feature::RemoteModels,
key: "remote_models",
stage: Stage::Experimental,
default_enabled: false,
},
FeatureSpec {
id: Feature::ParallelToolCalls,
key: "parallel",

View file

@ -291,6 +291,7 @@ mod tests {
use super::*;
use codex_protocol::openai_models::ClientVersion;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ReasoningEffortPreset;
fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo {
ModelInfo {
@ -298,12 +299,16 @@ mod tests {
display_name: slug.to_string(),
description: Some(format!("{slug} desc")),
default_reasoning_level: effort,
supported_reasoning_levels: vec![effort],
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort,
description: effort.to_string(),
}],
shell_type: shell,
visibility: ModelVisibility::List,
minimal_client_version: ClientVersion(0, 1, 0),
supported_in_api: true,
priority: 1,
upgrade: None,
}
}

View file

@ -36,7 +36,6 @@ impl ModelsManager {
}
}
// do not use this function yet. It's work in progress.
pub async fn refresh_available_models(
&self,
provider: &ModelProviderInfo,
@ -47,16 +46,21 @@ impl ModelsManager {
let transport = ReqwestTransport::new(build_reqwest_client());
let client = ModelsClient::new(transport, api_provider, api_auth);
let mut client_version = env!("CARGO_PKG_VERSION");
if client_version == "0.0.0" {
client_version = "99.99.99";
}
let response = client
.list_models(env!("CARGO_PKG_VERSION"), HeaderMap::new())
.list_models(client_version, HeaderMap::new())
.await
.map_err(map_api_error)?;
let models = response.models;
*self.remote_models.write().await = models.clone();
let available_models = self.build_available_models().await;
{
let mut available_models_guard = self.available_models.write().await;
*available_models_guard = self.build_available_models().await;
*available_models_guard = available_models;
}
Ok(models)
}
@ -75,8 +79,11 @@ impl ModelsManager {
async fn build_available_models(&self) -> Vec<ModelPreset> {
let mut available_models = self.remote_models.read().await.clone();
available_models.sort_by(|a, b| b.priority.cmp(&a.priority));
let mut model_presets: Vec<ModelPreset> =
available_models.into_iter().map(Into::into).collect();
let mut model_presets: Vec<ModelPreset> = available_models
.into_iter()
.map(Into::into)
.filter(|preset: &ModelPreset| preset.show_in_picker)
.collect();
if let Some(default) = model_presets.first_mut() {
default.is_default = true;
}
@ -103,12 +110,13 @@ mod tests {
"display_name": display,
"description": format!("{display} desc"),
"default_reasoning_level": "medium",
"supported_reasoning_levels": ["low", "medium"],
"supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}],
"shell_type": "shell_command",
"visibility": "list",
"minimal_client_version": [0, 1, 0],
"supported_in_api": true,
"priority": priority
"priority": priority,
"upgrade": null,
}))
.expect("valid model")
}

View file

@ -41,6 +41,7 @@ mod otel;
mod prompt_caching;
mod quota_exceeded;
mod read_file;
mod remote_models;
mod resume;
mod review;
mod rmcp_client;

View file

@ -0,0 +1,183 @@
#![cfg(not(target_os = "windows"))]
// unified exec is not supported on Windows OS
use std::sync::Arc;
use anyhow::Result;
use codex_core::features::Feature;
use codex_core::openai_models::models_manager::ModelsManager;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::ExecCommandSource;
use codex_core::protocol::Op;
use codex_core::protocol::SandboxPolicy;
use codex_protocol::config_types::ReasoningSummary;
use codex_protocol::openai_models::ClientVersion;
use codex_protocol::openai_models::ConfigShellToolType;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelPreset;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ModelsResponse;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::user_input::UserInput;
use core_test_support::responses::ev_assistant_message;
use core_test_support::responses::ev_completed;
use core_test_support::responses::ev_function_call;
use core_test_support::responses::ev_response_created;
use core_test_support::responses::mount_models_once;
use core_test_support::responses::mount_sse_sequence;
use core_test_support::responses::sse;
use core_test_support::skip_if_no_network;
use core_test_support::skip_if_sandbox;
use core_test_support::test_codex::TestCodex;
use core_test_support::test_codex::test_codex;
use core_test_support::wait_for_event;
use core_test_support::wait_for_event_match;
use serde_json::json;
use tokio::time::Duration;
use tokio::time::Instant;
use tokio::time::sleep;
use wiremock::BodyPrintLimit;
use wiremock::MockServer;
const REMOTE_MODEL_SLUG: &str = "codex-test";
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let remote_model = ModelInfo {
slug: REMOTE_MODEL_SLUG.to_string(),
display_name: "Remote Test".to_string(),
description: Some("A remote model that requires the test shell".to_string()),
default_reasoning_level: ReasoningEffort::Medium,
supported_reasoning_levels: vec![ReasoningEffortPreset {
effort: ReasoningEffort::Medium,
description: ReasoningEffort::Medium.to_string(),
}],
shell_type: ConfigShellToolType::UnifiedExec,
visibility: ModelVisibility::List,
minimal_client_version: ClientVersion(0, 1, 0),
supported_in_api: true,
priority: 1,
upgrade: None,
};
let models_mock = mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let mut builder = test_codex().with_config(|config| {
config.features.enable(Feature::RemoteModels);
config.model = "gpt-5.1".to_string();
});
let TestCodex {
codex,
cwd,
config,
conversation_manager,
..
} = builder.build(&server).await?;
let models_manager = conversation_manager.get_models_manager();
let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG).await;
assert_eq!(available_model.model, REMOTE_MODEL_SLUG);
let requests = models_mock.requests();
assert_eq!(
requests.len(),
1,
"expected a single /models refresh request for the remote models feature"
);
assert_eq!(requests[0].url.path(), "/v1/models");
let family = models_manager
.construct_model_family(REMOTE_MODEL_SLUG, &config)
.await;
assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec);
codex
.submit(Op::OverrideTurnContext {
cwd: None,
approval_policy: None,
sandbox_policy: None,
model: Some(REMOTE_MODEL_SLUG.to_string()),
effort: None,
summary: None,
})
.await?;
let call_id = "call";
let args = json!({
"cmd": "/bin/echo call",
"yield_time_ms": 250,
});
let responses = vec![
sse(vec![
ev_response_created("resp-1"),
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
ev_completed("resp-1"),
]),
sse(vec![
ev_response_created("resp-2"),
ev_assistant_message("msg-1", "done"),
ev_completed("resp-2"),
]),
];
mount_sse_sequence(&server, responses).await;
codex
.submit(Op::UserTurn {
items: vec![UserInput::Text {
text: "run call".into(),
}],
final_output_json_schema: None,
cwd: cwd.path().to_path_buf(),
approval_policy: AskForApproval::Never,
sandbox_policy: SandboxPolicy::DangerFullAccess,
model: REMOTE_MODEL_SLUG.to_string(),
effort: None,
summary: ReasoningSummary::Auto,
})
.await?;
let begin_event = wait_for_event_match(&codex, |msg| match msg {
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
_ => None,
})
.await;
assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup);
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
Ok(())
}
async fn wait_for_model_available(manager: &Arc<ModelsManager>, slug: &str) -> ModelPreset {
let deadline = Instant::now() + Duration::from_secs(2);
loop {
if let Some(model) = {
let guard = manager.available_models.read().await;
guard.iter().find(|model| model.model == slug).cloned()
} {
return model;
}
if Instant::now() >= deadline {
panic!("timed out waiting for the remote model {slug} to appear");
}
sleep(Duration::from_millis(25)).await;
}
}

View file

@ -3,6 +3,7 @@ use std::collections::HashMap;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use strum::IntoEnumIterator;
use strum_macros::Display;
use strum_macros::EnumIter;
use ts_rs::TS;
@ -36,7 +37,7 @@ pub enum ReasoningEffort {
}
/// A reasoning effort option that can be surfaced for a model.
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)]
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)]
pub struct ReasoningEffortPreset {
/// Effort level that the model supports.
pub effort: ReasoningEffort,
@ -123,7 +124,7 @@ pub struct ModelInfo {
#[serde(default)]
pub description: Option<String>,
pub default_reasoning_level: ReasoningEffort,
pub supported_reasoning_levels: Vec<ReasoningEffort>,
pub supported_reasoning_levels: Vec<ReasoningEffortPreset>,
pub shell_type: ConfigShellToolType,
#[serde(default = "default_visibility")]
pub visibility: ModelVisibility,
@ -132,6 +133,8 @@ pub struct ModelInfo {
pub supported_in_api: bool,
#[serde(default)]
pub priority: i32,
#[serde(default)]
pub upgrade: Option<String>,
}
/// Response wrapper for `/models`.
@ -149,22 +152,57 @@ impl From<ModelInfo> for ModelPreset {
fn from(info: ModelInfo) -> Self {
ModelPreset {
id: info.slug.clone(),
model: info.slug,
model: info.slug.clone(),
display_name: info.display_name,
description: info.description.unwrap_or_default(),
default_reasoning_effort: info.default_reasoning_level,
supported_reasoning_efforts: info
.supported_reasoning_levels
.into_iter()
.map(|level| ReasoningEffortPreset {
effort: level,
// todo: add description for each reasoning effort
description: level.to_string(),
})
.collect(),
supported_reasoning_efforts: info.supported_reasoning_levels.clone(),
is_default: false, // default is the highest priority available model
upgrade: None, // no upgrade available (todo: think about it)
upgrade: info.upgrade.as_ref().map(|upgrade_slug| ModelUpgrade {
id: upgrade_slug.clone(),
reasoning_effort_mapping: reasoning_effort_mapping_from_presets(
&info.supported_reasoning_levels,
),
migration_config_key: info.slug.clone(),
}),
show_in_picker: info.visibility == ModelVisibility::List,
}
}
}
fn reasoning_effort_mapping_from_presets(
presets: &[ReasoningEffortPreset],
) -> Option<HashMap<ReasoningEffort, ReasoningEffort>> {
if presets.is_empty() {
return None;
}
// Map every canonical effort to the closest supported effort for the new model.
let supported: Vec<ReasoningEffort> = presets.iter().map(|p| p.effort).collect();
let mut map = HashMap::new();
for effort in ReasoningEffort::iter() {
let nearest = nearest_effort(effort, &supported);
map.insert(effort, nearest);
}
Some(map)
}
fn effort_rank(effort: ReasoningEffort) -> i32 {
match effort {
ReasoningEffort::None => 0,
ReasoningEffort::Minimal => 1,
ReasoningEffort::Low => 2,
ReasoningEffort::Medium => 3,
ReasoningEffort::High => 4,
ReasoningEffort::XHigh => 5,
}
}
fn nearest_effort(target: ReasoningEffort, supported: &[ReasoningEffort]) -> ReasoningEffort {
let target_rank = effort_rank(target);
supported
.iter()
.copied()
.min_by_key(|candidate| (effort_rank(*candidate) - target_rank).abs())
.unwrap_or(target)
}

View file

@ -1348,7 +1348,7 @@ pub struct ReviewLineRange {
pub end: u32,
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[derive(Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum ExecCommandSource {
Agent,