From 187924d7619fbbe8bc7b0c37710fe7d63dd80d19 Mon Sep 17 00:00:00 2001 From: Ahmed Ibrahim Date: Wed, 7 Jan 2026 13:06:20 -0800 Subject: [PATCH] Override truncation policy at model info level (#8856) We used to override truncation policy by comparing model info vs config value in context manager. A better way to do it is to construct model info using the config value --- codex-rs/core/src/codex.rs | 10 +- codex-rs/core/src/config/mod.rs | 1 - .../core/src/models_manager/model_info.rs | 15 +++ codex-rs/core/src/truncate.rs | 24 +--- codex-rs/core/tests/suite/mod.rs | 1 + .../core/tests/suite/model_info_overrides.rs | 32 ++++++ codex-rs/core/tests/suite/remote_models.rs | 105 +++++++++++++++++- 7 files changed, 155 insertions(+), 33 deletions(-) create mode 100644 codex-rs/core/tests/suite/model_info_overrides.rs diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 3251ed118..236a485c6 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -544,10 +544,7 @@ impl Session { final_output_json_schema: None, codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new( - per_turn_config.as_ref(), - model_info.truncation_policy.into(), - ), + truncation_policy: model_info.truncation_policy.into(), } } @@ -2244,10 +2241,7 @@ async fn spawn_review_thread( final_output_json_schema: None, codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(), tool_call_gate: Arc::new(ReadinessFlag::new()), - truncation_policy: TruncationPolicy::new( - &per_turn_config, - model_info.truncation_policy.into(), - ), + truncation_policy: model_info.truncation_policy.into(), }; // Seed the child task with the review prompt as the initial user message. diff --git a/codex-rs/core/src/config/mod.rs b/codex-rs/core/src/config/mod.rs index 71b14973d..6099e1c29 100644 --- a/codex-rs/core/src/config/mod.rs +++ b/codex-rs/core/src/config/mod.rs @@ -268,7 +268,6 @@ pub struct Config { /// Additional filenames to try when looking for project-level docs. pub project_doc_fallback_filenames: Vec, - // todo(aibrahim): this should be used in the override model info /// Token budget applied when storing tool/function outputs in the context manager. pub tool_output_token_limit: Option, diff --git a/codex-rs/core/src/models_manager/model_info.rs b/codex-rs/core/src/models_manager/model_info.rs index f907237f1..6e46b9358 100644 --- a/codex-rs/core/src/models_manager/model_info.rs +++ b/codex-rs/core/src/models_manager/model_info.rs @@ -5,9 +5,11 @@ use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::TruncationMode; use codex_protocol::openai_models::TruncationPolicyConfig; use crate::config::Config; +use crate::truncate::approx_bytes_for_tokens; use tracing::warn; const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md"); @@ -71,6 +73,19 @@ pub(crate) fn with_config_overrides(mut model: ModelInfo, config: &Config) -> Mo if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit { model.auto_compact_token_limit = Some(auto_compact_token_limit); } + if let Some(token_limit) = config.tool_output_token_limit { + model.truncation_policy = match model.truncation_policy.mode { + TruncationMode::Bytes => { + let byte_limit = + i64::try_from(approx_bytes_for_tokens(token_limit)).unwrap_or(i64::MAX); + TruncationPolicyConfig::bytes(byte_limit) + } + TruncationMode::Tokens => { + let limit = i64::try_from(token_limit).unwrap_or(i64::MAX); + TruncationPolicyConfig::tokens(limit) + } + }; + } model } diff --git a/codex-rs/core/src/truncate.rs b/codex-rs/core/src/truncate.rs index 7cced9970..8150b994d 100644 --- a/codex-rs/core/src/truncate.rs +++ b/codex-rs/core/src/truncate.rs @@ -2,7 +2,6 @@ //! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation //! used across the core crate. -use crate::config::Config; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::openai_models::TruncationMode; use codex_protocol::openai_models::TruncationPolicyConfig; @@ -47,27 +46,6 @@ impl TruncationPolicy { } } - pub fn new(config: &Config, truncation_policy: TruncationPolicy) -> Self { - let config_token_limit = config.tool_output_token_limit; - - match truncation_policy { - TruncationPolicy::Bytes(family_bytes) => { - if let Some(token_limit) = config_token_limit { - Self::Bytes(approx_bytes_for_tokens(token_limit)) - } else { - Self::Bytes(family_bytes) - } - } - TruncationPolicy::Tokens(family_tokens) => { - if let Some(token_limit) = config_token_limit { - Self::Tokens(token_limit) - } else { - Self::Tokens(family_tokens) - } - } - } - } - /// Returns a token budget derived from this policy. /// /// - For `Tokens`, this is the explicit token limit. @@ -313,7 +291,7 @@ pub(crate) fn approx_token_count(text: &str) -> usize { len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN } -fn approx_bytes_for_tokens(tokens: usize) -> usize { +pub(crate) fn approx_bytes_for_tokens(tokens: usize) -> usize { tokens.saturating_mul(APPROX_BYTES_PER_TOKEN) } diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index dde687a39..724264107 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -35,6 +35,7 @@ mod json_result; mod list_dir; mod list_models; mod live_cli; +mod model_info_overrides; mod model_overrides; mod model_tools; mod models_etag_responses; diff --git a/codex-rs/core/tests/suite/model_info_overrides.rs b/codex-rs/core/tests/suite/model_info_overrides.rs new file mode 100644 index 000000000..9d9c0feef --- /dev/null +++ b/codex-rs/core/tests/suite/model_info_overrides.rs @@ -0,0 +1,32 @@ +use codex_core::models_manager::manager::ModelsManager; +use codex_protocol::openai_models::TruncationPolicyConfig; +use core_test_support::load_default_config_for_test; +use pretty_assertions::assert_eq; +use tempfile::TempDir; + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn offline_model_info_without_tool_output_override() { + let codex_home = TempDir::new().expect("create temp dir"); + let config = load_default_config_for_test(&codex_home).await; + + let model_info = ModelsManager::construct_model_info_offline("gpt-5.1", &config); + + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(10_000) + ); +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn offline_model_info_with_tool_output_override() { + let codex_home = TempDir::new().expect("create temp dir"); + let mut config = load_default_config_for_test(&codex_home).await; + config.tool_output_token_limit = Some(123); + + let model_info = ModelsManager::construct_model_info_offline("gpt-5.1-codex", &config); + + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::tokens(123) + ); +} diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index 507af97f2..71a3106bc 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -186,6 +186,95 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { Ok(()) } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_truncation_policy_without_override_preserves_remote() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let slug = "codex-test-truncation-policy"; + let remote_model = test_remote_model_with_policy( + slug, + ModelVisibility::List, + 1, + TruncationPolicyConfig::bytes(12_000), + ); + mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + }, + ) + .await; + + let harness = build_remote_models_harness(&server, |config| { + config.model = Some("gpt-5.1".to_string()); + }) + .await?; + + let models_manager = harness.thread_manager.get_models_manager(); + wait_for_model_available(&models_manager, slug, &harness.config).await; + + let model_info = models_manager + .construct_model_info(slug, &harness.config) + .await; + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(12_000) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn remote_models_truncation_policy_with_tool_output_override() -> Result<()> { + skip_if_no_network!(Ok(())); + skip_if_sandbox!(Ok(())); + + let server = MockServer::builder() + .body_print_limit(BodyPrintLimit::Limited(80_000)) + .start() + .await; + + let slug = "codex-test-truncation-override"; + let remote_model = test_remote_model_with_policy( + slug, + ModelVisibility::List, + 1, + TruncationPolicyConfig::bytes(10_000), + ); + mount_models_once( + &server, + ModelsResponse { + models: vec![remote_model], + }, + ) + .await; + + let harness = build_remote_models_harness(&server, |config| { + config.model = Some("gpt-5.1".to_string()); + config.tool_output_token_limit = Some(50); + }) + .await?; + + let models_manager = harness.thread_manager.get_models_manager(); + wait_for_model_available(&models_manager, slug, &harness.config).await; + + let model_info = models_manager + .construct_model_info(slug, &harness.config) + .await; + assert_eq!( + model_info.truncation_policy, + TruncationPolicyConfig::bytes(200) + ); + + Ok(()) +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn remote_models_apply_remote_base_instructions() -> Result<()> { skip_if_no_network!(Ok(())); @@ -461,6 +550,20 @@ where } fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> ModelInfo { + test_remote_model_with_policy( + slug, + visibility, + priority, + TruncationPolicyConfig::bytes(10_000), + ) +} + +fn test_remote_model_with_policy( + slug: &str, + visibility: ModelVisibility, + priority: i32, + truncation_policy: TruncationPolicyConfig, +) -> ModelInfo { ModelInfo { slug: slug.to_string(), display_name: format!("{slug} display"), @@ -480,7 +583,7 @@ fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> support_verbosity: false, default_verbosity: None, apply_patch_tool_type: None, - truncation_policy: TruncationPolicyConfig::bytes(10_000), + truncation_policy, supports_parallel_tool_calls: false, context_window: Some(272_000), auto_compact_token_limit: None,