Set verbosity to low for 5.1 (#6568)

And improve test coverage
This commit is contained in:
pakrym-oai 2025-11-12 17:40:52 -08:00 committed by GitHub
parent e63ab0dd65
commit f97874093e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 150 additions and 2 deletions

View file

@ -215,7 +215,9 @@ impl ModelClient {
let input_with_instructions = prompt.get_formatted_input();
let verbosity = if self.config.model_family.support_verbosity {
self.config.model_verbosity
self.config
.model_verbosity
.or(self.config.model_family.default_verbosity)
} else {
if self.config.model_verbosity.is_some() {
warn!(

View file

@ -1,3 +1,5 @@
use codex_protocol::config_types::Verbosity;
use crate::config::types::ReasoningSummaryFormat;
use crate::tools::handlers::apply_patch::ApplyPatchToolType;
use crate::tools::spec::ConfigShellToolType;
@ -55,6 +57,9 @@ pub struct ModelFamily {
/// If the model family supports setting the verbosity level when using Responses API.
pub support_verbosity: bool,
// The default verbosity level for this model family when using Responses API.
pub default_verbosity: Option<Verbosity>,
/// Preferred shell tool type for this model family when features do not override it.
pub shell_type: ConfigShellToolType,
}
@ -78,7 +83,9 @@ macro_rules! model_family {
effective_context_window_percent: 95,
support_verbosity: false,
shell_type: ConfigShellToolType::Default,
default_verbosity: None,
};
// apply overrides
$(
mf.$key = $value;
@ -174,6 +181,7 @@ pub fn find_family_for_model(slug: &str) -> Option<ModelFamily> {
supports_reasoning_summaries: true,
apply_patch_tool_type: Some(ApplyPatchToolType::Freeform),
support_verbosity: true,
default_verbosity: Some(Verbosity::Low),
base_instructions: GPT_5_1_INSTRUCTIONS.to_string(),
)
} else if slug.starts_with("gpt-5") {
@ -202,5 +210,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily {
effective_context_window_percent: 95,
support_verbosity: false,
shell_type: ConfigShellToolType::Default,
default_verbosity: None,
}
}

View file

@ -11,6 +11,7 @@ use codex_core::ModelProviderInfo;
use codex_core::built_in_model_providers;
use codex_core::config::Config;
use codex_core::features::Feature;
use codex_core::model_family::find_family_for_model;
use codex_core::protocol::AskForApproval;
use codex_core::protocol::EventMsg;
use codex_core::protocol::Op;
@ -41,6 +42,14 @@ impl TestCodexBuilder {
self
}
pub fn with_model(self, model: &str) -> Self {
let new_model = model.to_string();
self.with_config(move |config| {
config.model = new_model.clone();
config.model_family = find_family_for_model(&new_model).expect("model family");
})
}
pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result<TestCodex> {
let home = Arc::new(TempDir::new()?);
self.build_with_home(server, home, None).await

View file

@ -21,6 +21,7 @@ use codex_core::protocol::Op;
use codex_core::protocol::SessionSource;
use codex_otel::otel_event_manager::OtelEventManager;
use codex_protocol::ConversationId;
use codex_protocol::config_types::Verbosity;
use codex_protocol::models::ReasoningItemContent;
use codex_protocol::models::ReasoningItemReasoningSummary;
use codex_protocol::models::WebSearchAction;
@ -624,6 +625,122 @@ async fn includes_user_instructions_message_in_request() {
assert_message_ends_with(&request_body["input"][1], "</environment_context>");
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn includes_default_verbosity_in_request() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock =
responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
.await;
let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
assert_eq!(
request_body
.get("text")
.and_then(|t| t.get("verbosity"))
.and_then(|v| v.as_str()),
Some("low")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock =
responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
.await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5-codex")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
.build(&server)
.await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
assert!(
request_body
.get("text")
.and_then(|t| t.get("verbosity"))
.is_none()
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn configured_verbosity_is_sent() -> anyhow::Result<()> {
skip_if_no_network!(Ok(()));
let server = MockServer::start().await;
let resp_mock =
responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1"))
.await;
let TestCodex { codex, .. } = test_codex()
.with_model("gpt-5")
.with_config(|config| {
config.model_verbosity = Some(Verbosity::High);
})
.build(&server)
.await?;
codex
.submit(Op::UserInput {
items: vec![UserInput::Text {
text: "hello".into(),
}],
})
.await
.unwrap();
wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await;
let request = resp_mock.single_request();
let request_body = request.body_json();
assert_eq!(
request_body
.get("text")
.and_then(|t| t.get("verbosity"))
.and_then(|v| v.as_str()),
Some("high")
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn includes_developer_instructions_message_in_request() {
skip_if_no_network!();

View file

@ -50,7 +50,18 @@ pub enum ReasoningSummary {
/// Controls output length/detail on GPT-5 models via the Responses API.
/// Serialized with lowercase values to match the OpenAI API.
#[derive(
Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display, JsonSchema, TS,
Hash,
Debug,
Serialize,
Deserialize,
Default,
Clone,
Copy,
PartialEq,
Eq,
Display,
JsonSchema,
TS,
)]
#[serde(rename_all = "lowercase")]
#[strum(serialize_all = "lowercase")]