From f97874093ecff6a444426cdae1ab170f660380b5 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Wed, 12 Nov 2025 17:40:52 -0800 Subject: [PATCH] Set verbosity to low for 5.1 (#6568) And improve test coverage --- codex-rs/core/src/client.rs | 4 +- codex-rs/core/src/model_family.rs | 9 ++ codex-rs/core/tests/common/test_codex.rs | 9 ++ codex-rs/core/tests/suite/client.rs | 117 +++++++++++++++++++++++ codex-rs/protocol/src/config_types.rs | 13 ++- 5 files changed, 150 insertions(+), 2 deletions(-) diff --git a/codex-rs/core/src/client.rs b/codex-rs/core/src/client.rs index 651d8ba49..685e355cd 100644 --- a/codex-rs/core/src/client.rs +++ b/codex-rs/core/src/client.rs @@ -215,7 +215,9 @@ impl ModelClient { let input_with_instructions = prompt.get_formatted_input(); let verbosity = if self.config.model_family.support_verbosity { - self.config.model_verbosity + self.config + .model_verbosity + .or(self.config.model_family.default_verbosity) } else { if self.config.model_verbosity.is_some() { warn!( diff --git a/codex-rs/core/src/model_family.rs b/codex-rs/core/src/model_family.rs index b7765d1fe..9e3b25370 100644 --- a/codex-rs/core/src/model_family.rs +++ b/codex-rs/core/src/model_family.rs @@ -1,3 +1,5 @@ +use codex_protocol::config_types::Verbosity; + use crate::config::types::ReasoningSummaryFormat; use crate::tools::handlers::apply_patch::ApplyPatchToolType; use crate::tools::spec::ConfigShellToolType; @@ -55,6 +57,9 @@ pub struct ModelFamily { /// If the model family supports setting the verbosity level when using Responses API. pub support_verbosity: bool, + // The default verbosity level for this model family when using Responses API. + pub default_verbosity: Option, + /// Preferred shell tool type for this model family when features do not override it. pub shell_type: ConfigShellToolType, } @@ -78,7 +83,9 @@ macro_rules! model_family { effective_context_window_percent: 95, support_verbosity: false, shell_type: ConfigShellToolType::Default, + default_verbosity: None, }; + // apply overrides $( mf.$key = $value; @@ -174,6 +181,7 @@ pub fn find_family_for_model(slug: &str) -> Option { supports_reasoning_summaries: true, apply_patch_tool_type: Some(ApplyPatchToolType::Freeform), support_verbosity: true, + default_verbosity: Some(Verbosity::Low), base_instructions: GPT_5_1_INSTRUCTIONS.to_string(), ) } else if slug.starts_with("gpt-5") { @@ -202,5 +210,6 @@ pub fn derive_default_model_family(model: &str) -> ModelFamily { effective_context_window_percent: 95, support_verbosity: false, shell_type: ConfigShellToolType::Default, + default_verbosity: None, } } diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 0f9fdaae8..30c78d5be 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -11,6 +11,7 @@ use codex_core::ModelProviderInfo; use codex_core::built_in_model_providers; use codex_core::config::Config; use codex_core::features::Feature; +use codex_core::model_family::find_family_for_model; use codex_core::protocol::AskForApproval; use codex_core::protocol::EventMsg; use codex_core::protocol::Op; @@ -41,6 +42,14 @@ impl TestCodexBuilder { self } + pub fn with_model(self, model: &str) -> Self { + let new_model = model.to_string(); + self.with_config(move |config| { + config.model = new_model.clone(); + config.model_family = find_family_for_model(&new_model).expect("model family"); + }) + } + pub async fn build(&mut self, server: &wiremock::MockServer) -> anyhow::Result { let home = Arc::new(TempDir::new()?); self.build_with_home(server, home, None).await diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 3e97d988e..3184ca816 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -21,6 +21,7 @@ use codex_core::protocol::Op; use codex_core::protocol::SessionSource; use codex_otel::otel_event_manager::OtelEventManager; use codex_protocol::ConversationId; +use codex_protocol::config_types::Verbosity; use codex_protocol::models::ReasoningItemContent; use codex_protocol::models::ReasoningItemReasoningSummary; use codex_protocol::models::WebSearchAction; @@ -624,6 +625,122 @@ async fn includes_user_instructions_message_in_request() { assert_message_ends_with(&request_body["input"][1], ""); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn includes_default_verbosity_in_request() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + let server = MockServer::start().await; + + let resp_mock = + responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1")) + .await; + let TestCodex { codex, .. } = test_codex().with_model("gpt-5.1").build(&server).await?; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello".into(), + }], + }) + .await + .unwrap(); + + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let request = resp_mock.single_request(); + let request_body = request.body_json(); + + assert_eq!( + request_body + .get("text") + .and_then(|t| t.get("verbosity")) + .and_then(|v| v.as_str()), + Some("low") + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn configured_verbosity_not_sent_for_models_without_support() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + let server = MockServer::start().await; + + let resp_mock = + responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1")) + .await; + let TestCodex { codex, .. } = test_codex() + .with_model("gpt-5-codex") + .with_config(|config| { + config.model_verbosity = Some(Verbosity::High); + }) + .build(&server) + .await?; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello".into(), + }], + }) + .await + .unwrap(); + + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let request = resp_mock.single_request(); + let request_body = request.body_json(); + + assert!( + request_body + .get("text") + .and_then(|t| t.get("verbosity")) + .is_none() + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn configured_verbosity_is_sent() -> anyhow::Result<()> { + skip_if_no_network!(Ok(())); + let server = MockServer::start().await; + + let resp_mock = + responses::mount_sse_once_match(&server, path("/v1/responses"), sse_completed("resp1")) + .await; + let TestCodex { codex, .. } = test_codex() + .with_model("gpt-5") + .with_config(|config| { + config.model_verbosity = Some(Verbosity::High); + }) + .build(&server) + .await?; + + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "hello".into(), + }], + }) + .await + .unwrap(); + + wait_for_event(&codex, |ev| matches!(ev, EventMsg::TaskComplete(_))).await; + + let request = resp_mock.single_request(); + let request_body = request.body_json(); + + assert_eq!( + request_body + .get("text") + .and_then(|t| t.get("verbosity")) + .and_then(|v| v.as_str()), + Some("high") + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn includes_developer_instructions_message_in_request() { skip_if_no_network!(); diff --git a/codex-rs/protocol/src/config_types.rs b/codex-rs/protocol/src/config_types.rs index 32950527b..8f950314d 100644 --- a/codex-rs/protocol/src/config_types.rs +++ b/codex-rs/protocol/src/config_types.rs @@ -50,7 +50,18 @@ pub enum ReasoningSummary { /// Controls output length/detail on GPT-5 models via the Responses API. /// Serialized with lowercase values to match the OpenAI API. #[derive( - Debug, Serialize, Deserialize, Default, Clone, Copy, PartialEq, Eq, Display, JsonSchema, TS, + Hash, + Debug, + Serialize, + Deserialize, + Default, + Clone, + Copy, + PartialEq, + Eq, + Display, + JsonSchema, + TS, )] #[serde(rename_all = "lowercase")] #[strum(serialize_all = "lowercase")]