Override truncation policy at model info level (#8856)

We used to override truncation policy by comparing model info vs config
value in context manager. A better way to do it is to construct model
info using the config value
This commit is contained in:
Ahmed Ibrahim 2026-01-07 13:06:20 -08:00 committed by GitHub
parent 66450f0445
commit 187924d761
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 155 additions and 33 deletions

View file

@ -544,10 +544,7 @@ impl Session {
final_output_json_schema: None,
codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
truncation_policy: TruncationPolicy::new(
per_turn_config.as_ref(),
model_info.truncation_policy.into(),
),
truncation_policy: model_info.truncation_policy.into(),
}
}
@ -2244,10 +2241,7 @@ async fn spawn_review_thread(
final_output_json_schema: None,
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
tool_call_gate: Arc::new(ReadinessFlag::new()),
truncation_policy: TruncationPolicy::new(
&per_turn_config,
model_info.truncation_policy.into(),
),
truncation_policy: model_info.truncation_policy.into(),
};
// Seed the child task with the review prompt as the initial user message.

View file

@ -268,7 +268,6 @@ pub struct Config {
/// Additional filenames to try when looking for project-level docs.
pub project_doc_fallback_filenames: Vec<String>,
// todo(aibrahim): this should be used in the override model info
/// Token budget applied when storing tool/function outputs in the context manager.
pub tool_output_token_limit: Option<usize>,

View file

@ -5,9 +5,11 @@ use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ModelVisibility;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::openai_models::ReasoningEffortPreset;
use codex_protocol::openai_models::TruncationMode;
use codex_protocol::openai_models::TruncationPolicyConfig;
use crate::config::Config;
use crate::truncate::approx_bytes_for_tokens;
use tracing::warn;
const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md");
@ -71,6 +73,19 @@ pub(crate) fn with_config_overrides(mut model: ModelInfo, config: &Config) -> Mo
if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit {
model.auto_compact_token_limit = Some(auto_compact_token_limit);
}
if let Some(token_limit) = config.tool_output_token_limit {
model.truncation_policy = match model.truncation_policy.mode {
TruncationMode::Bytes => {
let byte_limit =
i64::try_from(approx_bytes_for_tokens(token_limit)).unwrap_or(i64::MAX);
TruncationPolicyConfig::bytes(byte_limit)
}
TruncationMode::Tokens => {
let limit = i64::try_from(token_limit).unwrap_or(i64::MAX);
TruncationPolicyConfig::tokens(limit)
}
};
}
model
}

View file

@ -2,7 +2,6 @@
//! and suffix on UTF-8 boundaries, and helpers for line/tokenbased truncation
//! used across the core crate.
use crate::config::Config;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::openai_models::TruncationMode;
use codex_protocol::openai_models::TruncationPolicyConfig;
@ -47,27 +46,6 @@ impl TruncationPolicy {
}
}
pub fn new(config: &Config, truncation_policy: TruncationPolicy) -> Self {
let config_token_limit = config.tool_output_token_limit;
match truncation_policy {
TruncationPolicy::Bytes(family_bytes) => {
if let Some(token_limit) = config_token_limit {
Self::Bytes(approx_bytes_for_tokens(token_limit))
} else {
Self::Bytes(family_bytes)
}
}
TruncationPolicy::Tokens(family_tokens) => {
if let Some(token_limit) = config_token_limit {
Self::Tokens(token_limit)
} else {
Self::Tokens(family_tokens)
}
}
}
}
/// Returns a token budget derived from this policy.
///
/// - For `Tokens`, this is the explicit token limit.
@ -313,7 +291,7 @@ pub(crate) fn approx_token_count(text: &str) -> usize {
len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN
}
fn approx_bytes_for_tokens(tokens: usize) -> usize {
pub(crate) fn approx_bytes_for_tokens(tokens: usize) -> usize {
tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)
}

View file

@ -35,6 +35,7 @@ mod json_result;
mod list_dir;
mod list_models;
mod live_cli;
mod model_info_overrides;
mod model_overrides;
mod model_tools;
mod models_etag_responses;

View file

@ -0,0 +1,32 @@
use codex_core::models_manager::manager::ModelsManager;
use codex_protocol::openai_models::TruncationPolicyConfig;
use core_test_support::load_default_config_for_test;
use pretty_assertions::assert_eq;
use tempfile::TempDir;
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn offline_model_info_without_tool_output_override() {
let codex_home = TempDir::new().expect("create temp dir");
let config = load_default_config_for_test(&codex_home).await;
let model_info = ModelsManager::construct_model_info_offline("gpt-5.1", &config);
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::bytes(10_000)
);
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn offline_model_info_with_tool_output_override() {
let codex_home = TempDir::new().expect("create temp dir");
let mut config = load_default_config_for_test(&codex_home).await;
config.tool_output_token_limit = Some(123);
let model_info = ModelsManager::construct_model_info_offline("gpt-5.1-codex", &config);
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::tokens(123)
);
}

View file

@ -186,6 +186,95 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_truncation_policy_without_override_preserves_remote() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let slug = "codex-test-truncation-policy";
let remote_model = test_remote_model_with_policy(
slug,
ModelVisibility::List,
1,
TruncationPolicyConfig::bytes(12_000),
);
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let harness = build_remote_models_harness(&server, |config| {
config.model = Some("gpt-5.1".to_string());
})
.await?;
let models_manager = harness.thread_manager.get_models_manager();
wait_for_model_available(&models_manager, slug, &harness.config).await;
let model_info = models_manager
.construct_model_info(slug, &harness.config)
.await;
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::bytes(12_000)
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_truncation_policy_with_tool_output_override() -> Result<()> {
skip_if_no_network!(Ok(()));
skip_if_sandbox!(Ok(()));
let server = MockServer::builder()
.body_print_limit(BodyPrintLimit::Limited(80_000))
.start()
.await;
let slug = "codex-test-truncation-override";
let remote_model = test_remote_model_with_policy(
slug,
ModelVisibility::List,
1,
TruncationPolicyConfig::bytes(10_000),
);
mount_models_once(
&server,
ModelsResponse {
models: vec![remote_model],
},
)
.await;
let harness = build_remote_models_harness(&server, |config| {
config.model = Some("gpt-5.1".to_string());
config.tool_output_token_limit = Some(50);
})
.await?;
let models_manager = harness.thread_manager.get_models_manager();
wait_for_model_available(&models_manager, slug, &harness.config).await;
let model_info = models_manager
.construct_model_info(slug, &harness.config)
.await;
assert_eq!(
model_info.truncation_policy,
TruncationPolicyConfig::bytes(200)
);
Ok(())
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn remote_models_apply_remote_base_instructions() -> Result<()> {
skip_if_no_network!(Ok(()));
@ -461,6 +550,20 @@ where
}
fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> ModelInfo {
test_remote_model_with_policy(
slug,
visibility,
priority,
TruncationPolicyConfig::bytes(10_000),
)
}
fn test_remote_model_with_policy(
slug: &str,
visibility: ModelVisibility,
priority: i32,
truncation_policy: TruncationPolicyConfig,
) -> ModelInfo {
ModelInfo {
slug: slug.to_string(),
display_name: format!("{slug} display"),
@ -480,7 +583,7 @@ fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) ->
support_verbosity: false,
default_verbosity: None,
apply_patch_tool_type: None,
truncation_policy: TruncationPolicyConfig::bytes(10_000),
truncation_policy,
supports_parallel_tool_calls: false,
context_window: Some(272_000),
auto_compact_token_limit: None,