Add remote models feature flag (#7648)
# External (non-OpenAI) Pull Request Requirements Before opening this Pull Request, please read the dedicated "Contributing" markdown file or your PR may be closed: https://github.com/openai/codex/blob/main/docs/contributing.md If your PR conforms to our contribution guidelines, replace this text with a detailed and high quality description of your changes. Include a link to a bug report or enhancement request.
This commit is contained in:
parent
3c3d3d1adc
commit
53a486f7ea
10 changed files with 292 additions and 27 deletions
|
|
@ -181,12 +181,13 @@ mod tests {
|
|||
"display_name": "gpt-test",
|
||||
"description": "desc",
|
||||
"default_reasoning_level": "medium",
|
||||
"supported_reasoning_levels": ["low", "medium", "high"],
|
||||
"supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}, {"effort": "high", "description": "high"}],
|
||||
"shell_type": "shell_command",
|
||||
"visibility": "list",
|
||||
"minimal_client_version": [0, 99, 0],
|
||||
"supported_in_api": true,
|
||||
"priority": 1
|
||||
"priority": 1,
|
||||
"upgrade": null,
|
||||
}))
|
||||
.unwrap(),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ use codex_protocol::openai_models::ModelInfo;
|
|||
use codex_protocol::openai_models::ModelVisibility;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
use http::HeaderMap;
|
||||
use http::Method;
|
||||
use wiremock::Mock;
|
||||
|
|
@ -57,15 +58,25 @@ async fn models_client_hits_models_endpoint() {
|
|||
description: Some("desc".to_string()),
|
||||
default_reasoning_level: ReasoningEffort::Medium,
|
||||
supported_reasoning_levels: vec![
|
||||
ReasoningEffort::Low,
|
||||
ReasoningEffort::Medium,
|
||||
ReasoningEffort::High,
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Low,
|
||||
description: ReasoningEffort::Low.to_string(),
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: ReasoningEffort::Medium.to_string(),
|
||||
},
|
||||
ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::High,
|
||||
description: ReasoningEffort::High.to_string(),
|
||||
},
|
||||
],
|
||||
shell_type: ConfigShellToolType::ShellCommand,
|
||||
visibility: ModelVisibility::List,
|
||||
minimal_client_version: ClientVersion(0, 1, 0),
|
||||
supported_in_api: true,
|
||||
priority: 1,
|
||||
upgrade: None,
|
||||
}],
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1470,6 +1470,16 @@ async fn submission_loop(sess: Arc<Session>, config: Arc<Config>, rx_sub: Receiv
|
|||
let mut previous_context: Option<Arc<TurnContext>> =
|
||||
Some(sess.new_turn(SessionSettingsUpdate::default()).await);
|
||||
|
||||
if config.features.enabled(Feature::RemoteModels)
|
||||
&& let Err(err) = sess
|
||||
.services
|
||||
.models_manager
|
||||
.refresh_available_models(&config.model_provider)
|
||||
.await
|
||||
{
|
||||
error!("failed to refresh available models: {err}");
|
||||
}
|
||||
|
||||
// To break out of this loop, send Op::Shutdown.
|
||||
while let Ok(sub) = rx_sub.recv().await {
|
||||
debug!(?sub, "Submission");
|
||||
|
|
|
|||
|
|
@ -54,6 +54,8 @@ pub enum Feature {
|
|||
WindowsSandbox,
|
||||
/// Remote compaction enabled (only for ChatGPT auth)
|
||||
RemoteCompaction,
|
||||
/// Refresh remote models and emit AppReady once the list is available.
|
||||
RemoteModels,
|
||||
/// Allow model to call multiple tools in parallel (only for models supporting it).
|
||||
ParallelToolCalls,
|
||||
/// Experimental skills injection (CLI flag-driven).
|
||||
|
|
@ -333,6 +335,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
|||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::RemoteModels,
|
||||
key: "remote_models",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::ParallelToolCalls,
|
||||
key: "parallel",
|
||||
|
|
|
|||
|
|
@ -291,6 +291,7 @@ mod tests {
|
|||
use super::*;
|
||||
use codex_protocol::openai_models::ClientVersion;
|
||||
use codex_protocol::openai_models::ModelVisibility;
|
||||
use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
|
||||
fn remote(slug: &str, effort: ReasoningEffort, shell: ConfigShellToolType) -> ModelInfo {
|
||||
ModelInfo {
|
||||
|
|
@ -298,12 +299,16 @@ mod tests {
|
|||
display_name: slug.to_string(),
|
||||
description: Some(format!("{slug} desc")),
|
||||
default_reasoning_level: effort,
|
||||
supported_reasoning_levels: vec![effort],
|
||||
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
||||
effort,
|
||||
description: effort.to_string(),
|
||||
}],
|
||||
shell_type: shell,
|
||||
visibility: ModelVisibility::List,
|
||||
minimal_client_version: ClientVersion(0, 1, 0),
|
||||
supported_in_api: true,
|
||||
priority: 1,
|
||||
upgrade: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ impl ModelsManager {
|
|||
}
|
||||
}
|
||||
|
||||
// do not use this function yet. It's work in progress.
|
||||
pub async fn refresh_available_models(
|
||||
&self,
|
||||
provider: &ModelProviderInfo,
|
||||
|
|
@ -47,16 +46,21 @@ impl ModelsManager {
|
|||
let transport = ReqwestTransport::new(build_reqwest_client());
|
||||
let client = ModelsClient::new(transport, api_provider, api_auth);
|
||||
|
||||
let mut client_version = env!("CARGO_PKG_VERSION");
|
||||
if client_version == "0.0.0" {
|
||||
client_version = "99.99.99";
|
||||
}
|
||||
let response = client
|
||||
.list_models(env!("CARGO_PKG_VERSION"), HeaderMap::new())
|
||||
.list_models(client_version, HeaderMap::new())
|
||||
.await
|
||||
.map_err(map_api_error)?;
|
||||
|
||||
let models = response.models;
|
||||
*self.remote_models.write().await = models.clone();
|
||||
let available_models = self.build_available_models().await;
|
||||
{
|
||||
let mut available_models_guard = self.available_models.write().await;
|
||||
*available_models_guard = self.build_available_models().await;
|
||||
*available_models_guard = available_models;
|
||||
}
|
||||
Ok(models)
|
||||
}
|
||||
|
|
@ -75,8 +79,11 @@ impl ModelsManager {
|
|||
async fn build_available_models(&self) -> Vec<ModelPreset> {
|
||||
let mut available_models = self.remote_models.read().await.clone();
|
||||
available_models.sort_by(|a, b| b.priority.cmp(&a.priority));
|
||||
let mut model_presets: Vec<ModelPreset> =
|
||||
available_models.into_iter().map(Into::into).collect();
|
||||
let mut model_presets: Vec<ModelPreset> = available_models
|
||||
.into_iter()
|
||||
.map(Into::into)
|
||||
.filter(|preset: &ModelPreset| preset.show_in_picker)
|
||||
.collect();
|
||||
if let Some(default) = model_presets.first_mut() {
|
||||
default.is_default = true;
|
||||
}
|
||||
|
|
@ -103,12 +110,13 @@ mod tests {
|
|||
"display_name": display,
|
||||
"description": format!("{display} desc"),
|
||||
"default_reasoning_level": "medium",
|
||||
"supported_reasoning_levels": ["low", "medium"],
|
||||
"supported_reasoning_levels": [{"effort": "low", "description": "low"}, {"effort": "medium", "description": "medium"}],
|
||||
"shell_type": "shell_command",
|
||||
"visibility": "list",
|
||||
"minimal_client_version": [0, 1, 0],
|
||||
"supported_in_api": true,
|
||||
"priority": priority
|
||||
"priority": priority,
|
||||
"upgrade": null,
|
||||
}))
|
||||
.expect("valid model")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ mod otel;
|
|||
mod prompt_caching;
|
||||
mod quota_exceeded;
|
||||
mod read_file;
|
||||
mod remote_models;
|
||||
mod resume;
|
||||
mod review;
|
||||
mod rmcp_client;
|
||||
|
|
|
|||
183
codex-rs/core/tests/suite/remote_models.rs
Normal file
183
codex-rs/core/tests/suite/remote_models.rs
Normal file
|
|
@ -0,0 +1,183 @@
|
|||
#![cfg(not(target_os = "windows"))]
|
||||
// unified exec is not supported on Windows OS
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::openai_models::models_manager::ModelsManager;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
use codex_core::protocol::EventMsg;
|
||||
use codex_core::protocol::ExecCommandSource;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::SandboxPolicy;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::openai_models::ClientVersion;
|
||||
use codex_protocol::openai_models::ConfigShellToolType;
|
||||
use codex_protocol::openai_models::ModelInfo;
|
||||
use codex_protocol::openai_models::ModelPreset;
|
||||
use codex_protocol::openai_models::ModelVisibility;
|
||||
use codex_protocol::openai_models::ModelsResponse;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
use core_test_support::responses::ev_function_call;
|
||||
use core_test_support::responses::ev_response_created;
|
||||
use core_test_support::responses::mount_models_once;
|
||||
use core_test_support::responses::mount_sse_sequence;
|
||||
use core_test_support::responses::sse;
|
||||
use core_test_support::skip_if_no_network;
|
||||
use core_test_support::skip_if_sandbox;
|
||||
use core_test_support::test_codex::TestCodex;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use core_test_support::wait_for_event;
|
||||
use core_test_support::wait_for_event_match;
|
||||
use serde_json::json;
|
||||
use tokio::time::Duration;
|
||||
use tokio::time::Instant;
|
||||
use tokio::time::sleep;
|
||||
use wiremock::BodyPrintLimit;
|
||||
use wiremock::MockServer;
|
||||
|
||||
const REMOTE_MODEL_SLUG: &str = "codex-test";
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
.start()
|
||||
.await;
|
||||
|
||||
let remote_model = ModelInfo {
|
||||
slug: REMOTE_MODEL_SLUG.to_string(),
|
||||
display_name: "Remote Test".to_string(),
|
||||
description: Some("A remote model that requires the test shell".to_string()),
|
||||
default_reasoning_level: ReasoningEffort::Medium,
|
||||
supported_reasoning_levels: vec![ReasoningEffortPreset {
|
||||
effort: ReasoningEffort::Medium,
|
||||
description: ReasoningEffort::Medium.to_string(),
|
||||
}],
|
||||
shell_type: ConfigShellToolType::UnifiedExec,
|
||||
visibility: ModelVisibility::List,
|
||||
minimal_client_version: ClientVersion(0, 1, 0),
|
||||
supported_in_api: true,
|
||||
priority: 1,
|
||||
upgrade: None,
|
||||
};
|
||||
|
||||
let models_mock = mount_models_once(
|
||||
&server,
|
||||
ModelsResponse {
|
||||
models: vec![remote_model],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let mut builder = test_codex().with_config(|config| {
|
||||
config.features.enable(Feature::RemoteModels);
|
||||
config.model = "gpt-5.1".to_string();
|
||||
});
|
||||
|
||||
let TestCodex {
|
||||
codex,
|
||||
cwd,
|
||||
config,
|
||||
conversation_manager,
|
||||
..
|
||||
} = builder.build(&server).await?;
|
||||
|
||||
let models_manager = conversation_manager.get_models_manager();
|
||||
let available_model = wait_for_model_available(&models_manager, REMOTE_MODEL_SLUG).await;
|
||||
|
||||
assert_eq!(available_model.model, REMOTE_MODEL_SLUG);
|
||||
|
||||
let requests = models_mock.requests();
|
||||
assert_eq!(
|
||||
requests.len(),
|
||||
1,
|
||||
"expected a single /models refresh request for the remote models feature"
|
||||
);
|
||||
assert_eq!(requests[0].url.path(), "/v1/models");
|
||||
|
||||
let family = models_manager
|
||||
.construct_model_family(REMOTE_MODEL_SLUG, &config)
|
||||
.await;
|
||||
assert_eq!(family.shell_type, ConfigShellToolType::UnifiedExec);
|
||||
|
||||
codex
|
||||
.submit(Op::OverrideTurnContext {
|
||||
cwd: None,
|
||||
approval_policy: None,
|
||||
sandbox_policy: None,
|
||||
model: Some(REMOTE_MODEL_SLUG.to_string()),
|
||||
effort: None,
|
||||
summary: None,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let call_id = "call";
|
||||
let args = json!({
|
||||
"cmd": "/bin/echo call",
|
||||
"yield_time_ms": 250,
|
||||
});
|
||||
let responses = vec![
|
||||
sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_function_call(call_id, "exec_command", &serde_json::to_string(&args)?),
|
||||
ev_completed("resp-1"),
|
||||
]),
|
||||
sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_assistant_message("msg-1", "done"),
|
||||
ev_completed("resp-2"),
|
||||
]),
|
||||
];
|
||||
mount_sse_sequence(&server, responses).await;
|
||||
|
||||
codex
|
||||
.submit(Op::UserTurn {
|
||||
items: vec![UserInput::Text {
|
||||
text: "run call".into(),
|
||||
}],
|
||||
final_output_json_schema: None,
|
||||
cwd: cwd.path().to_path_buf(),
|
||||
approval_policy: AskForApproval::Never,
|
||||
sandbox_policy: SandboxPolicy::DangerFullAccess,
|
||||
model: REMOTE_MODEL_SLUG.to_string(),
|
||||
effort: None,
|
||||
summary: ReasoningSummary::Auto,
|
||||
})
|
||||
.await?;
|
||||
|
||||
let begin_event = wait_for_event_match(&codex, |msg| match msg {
|
||||
EventMsg::ExecCommandBegin(event) if event.call_id == call_id => Some(event.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.await;
|
||||
|
||||
assert_eq!(begin_event.source, ExecCommandSource::UnifiedExecStartup);
|
||||
|
||||
wait_for_event(&codex, |event| matches!(event, EventMsg::TaskComplete(_))).await;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn wait_for_model_available(manager: &Arc<ModelsManager>, slug: &str) -> ModelPreset {
|
||||
let deadline = Instant::now() + Duration::from_secs(2);
|
||||
loop {
|
||||
if let Some(model) = {
|
||||
let guard = manager.available_models.read().await;
|
||||
guard.iter().find(|model| model.model == slug).cloned()
|
||||
} {
|
||||
return model;
|
||||
}
|
||||
if Instant::now() >= deadline {
|
||||
panic!("timed out waiting for the remote model {slug} to appear");
|
||||
}
|
||||
sleep(Duration::from_millis(25)).await;
|
||||
}
|
||||
}
|
||||
|
|
@ -3,6 +3,7 @@ use std::collections::HashMap;
|
|||
use schemars::JsonSchema;
|
||||
use serde::Deserialize;
|
||||
use serde::Serialize;
|
||||
use strum::IntoEnumIterator;
|
||||
use strum_macros::Display;
|
||||
use strum_macros::EnumIter;
|
||||
use ts_rs::TS;
|
||||
|
|
@ -36,7 +37,7 @@ pub enum ReasoningEffort {
|
|||
}
|
||||
|
||||
/// A reasoning effort option that can be surfaced for a model.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq)]
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)]
|
||||
pub struct ReasoningEffortPreset {
|
||||
/// Effort level that the model supports.
|
||||
pub effort: ReasoningEffort,
|
||||
|
|
@ -123,7 +124,7 @@ pub struct ModelInfo {
|
|||
#[serde(default)]
|
||||
pub description: Option<String>,
|
||||
pub default_reasoning_level: ReasoningEffort,
|
||||
pub supported_reasoning_levels: Vec<ReasoningEffort>,
|
||||
pub supported_reasoning_levels: Vec<ReasoningEffortPreset>,
|
||||
pub shell_type: ConfigShellToolType,
|
||||
#[serde(default = "default_visibility")]
|
||||
pub visibility: ModelVisibility,
|
||||
|
|
@ -132,6 +133,8 @@ pub struct ModelInfo {
|
|||
pub supported_in_api: bool,
|
||||
#[serde(default)]
|
||||
pub priority: i32,
|
||||
#[serde(default)]
|
||||
pub upgrade: Option<String>,
|
||||
}
|
||||
|
||||
/// Response wrapper for `/models`.
|
||||
|
|
@ -149,22 +152,57 @@ impl From<ModelInfo> for ModelPreset {
|
|||
fn from(info: ModelInfo) -> Self {
|
||||
ModelPreset {
|
||||
id: info.slug.clone(),
|
||||
model: info.slug,
|
||||
model: info.slug.clone(),
|
||||
display_name: info.display_name,
|
||||
description: info.description.unwrap_or_default(),
|
||||
default_reasoning_effort: info.default_reasoning_level,
|
||||
supported_reasoning_efforts: info
|
||||
.supported_reasoning_levels
|
||||
.into_iter()
|
||||
.map(|level| ReasoningEffortPreset {
|
||||
effort: level,
|
||||
// todo: add description for each reasoning effort
|
||||
description: level.to_string(),
|
||||
})
|
||||
.collect(),
|
||||
supported_reasoning_efforts: info.supported_reasoning_levels.clone(),
|
||||
is_default: false, // default is the highest priority available model
|
||||
upgrade: None, // no upgrade available (todo: think about it)
|
||||
upgrade: info.upgrade.as_ref().map(|upgrade_slug| ModelUpgrade {
|
||||
id: upgrade_slug.clone(),
|
||||
reasoning_effort_mapping: reasoning_effort_mapping_from_presets(
|
||||
&info.supported_reasoning_levels,
|
||||
),
|
||||
migration_config_key: info.slug.clone(),
|
||||
}),
|
||||
show_in_picker: info.visibility == ModelVisibility::List,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reasoning_effort_mapping_from_presets(
|
||||
presets: &[ReasoningEffortPreset],
|
||||
) -> Option<HashMap<ReasoningEffort, ReasoningEffort>> {
|
||||
if presets.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Map every canonical effort to the closest supported effort for the new model.
|
||||
let supported: Vec<ReasoningEffort> = presets.iter().map(|p| p.effort).collect();
|
||||
let mut map = HashMap::new();
|
||||
for effort in ReasoningEffort::iter() {
|
||||
let nearest = nearest_effort(effort, &supported);
|
||||
map.insert(effort, nearest);
|
||||
}
|
||||
Some(map)
|
||||
}
|
||||
|
||||
fn effort_rank(effort: ReasoningEffort) -> i32 {
|
||||
match effort {
|
||||
ReasoningEffort::None => 0,
|
||||
ReasoningEffort::Minimal => 1,
|
||||
ReasoningEffort::Low => 2,
|
||||
ReasoningEffort::Medium => 3,
|
||||
ReasoningEffort::High => 4,
|
||||
ReasoningEffort::XHigh => 5,
|
||||
}
|
||||
}
|
||||
|
||||
fn nearest_effort(target: ReasoningEffort, supported: &[ReasoningEffort]) -> ReasoningEffort {
|
||||
let target_rank = effort_rank(target);
|
||||
supported
|
||||
.iter()
|
||||
.copied()
|
||||
.min_by_key(|candidate| (effort_rank(*candidate) - target_rank).abs())
|
||||
.unwrap_or(target)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1348,7 +1348,7 @@ pub struct ReviewLineRange {
|
|||
pub end: u32,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[derive(Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum ExecCommandSource {
|
||||
Agent,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue