Override truncation policy at model info level (#8856)
We used to override truncation policy by comparing model info vs config value in context manager. A better way to do it is to construct model info using the config value
This commit is contained in:
parent
66450f0445
commit
187924d761
7 changed files with 155 additions and 33 deletions
|
|
@ -544,10 +544,7 @@ impl Session {
|
|||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: per_turn_config.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
truncation_policy: TruncationPolicy::new(
|
||||
per_turn_config.as_ref(),
|
||||
model_info.truncation_policy.into(),
|
||||
),
|
||||
truncation_policy: model_info.truncation_policy.into(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2244,10 +2241,7 @@ async fn spawn_review_thread(
|
|||
final_output_json_schema: None,
|
||||
codex_linux_sandbox_exe: parent_turn_context.codex_linux_sandbox_exe.clone(),
|
||||
tool_call_gate: Arc::new(ReadinessFlag::new()),
|
||||
truncation_policy: TruncationPolicy::new(
|
||||
&per_turn_config,
|
||||
model_info.truncation_policy.into(),
|
||||
),
|
||||
truncation_policy: model_info.truncation_policy.into(),
|
||||
};
|
||||
|
||||
// Seed the child task with the review prompt as the initial user message.
|
||||
|
|
|
|||
|
|
@ -268,7 +268,6 @@ pub struct Config {
|
|||
/// Additional filenames to try when looking for project-level docs.
|
||||
pub project_doc_fallback_filenames: Vec<String>,
|
||||
|
||||
// todo(aibrahim): this should be used in the override model info
|
||||
/// Token budget applied when storing tool/function outputs in the context manager.
|
||||
pub tool_output_token_limit: Option<usize>,
|
||||
|
||||
|
|
|
|||
|
|
@ -5,9 +5,11 @@ use codex_protocol::openai_models::ModelInfo;
|
|||
use codex_protocol::openai_models::ModelVisibility;
|
||||
use codex_protocol::openai_models::ReasoningEffort;
|
||||
use codex_protocol::openai_models::ReasoningEffortPreset;
|
||||
use codex_protocol::openai_models::TruncationMode;
|
||||
use codex_protocol::openai_models::TruncationPolicyConfig;
|
||||
|
||||
use crate::config::Config;
|
||||
use crate::truncate::approx_bytes_for_tokens;
|
||||
use tracing::warn;
|
||||
|
||||
const BASE_INSTRUCTIONS: &str = include_str!("../../prompt.md");
|
||||
|
|
@ -71,6 +73,19 @@ pub(crate) fn with_config_overrides(mut model: ModelInfo, config: &Config) -> Mo
|
|||
if let Some(auto_compact_token_limit) = config.model_auto_compact_token_limit {
|
||||
model.auto_compact_token_limit = Some(auto_compact_token_limit);
|
||||
}
|
||||
if let Some(token_limit) = config.tool_output_token_limit {
|
||||
model.truncation_policy = match model.truncation_policy.mode {
|
||||
TruncationMode::Bytes => {
|
||||
let byte_limit =
|
||||
i64::try_from(approx_bytes_for_tokens(token_limit)).unwrap_or(i64::MAX);
|
||||
TruncationPolicyConfig::bytes(byte_limit)
|
||||
}
|
||||
TruncationMode::Tokens => {
|
||||
let limit = i64::try_from(token_limit).unwrap_or(i64::MAX);
|
||||
TruncationPolicyConfig::tokens(limit)
|
||||
}
|
||||
};
|
||||
}
|
||||
model
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
//! and suffix on UTF-8 boundaries, and helpers for line/token‑based truncation
|
||||
//! used across the core crate.
|
||||
|
||||
use crate::config::Config;
|
||||
use codex_protocol::models::FunctionCallOutputContentItem;
|
||||
use codex_protocol::openai_models::TruncationMode;
|
||||
use codex_protocol::openai_models::TruncationPolicyConfig;
|
||||
|
|
@ -47,27 +46,6 @@ impl TruncationPolicy {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn new(config: &Config, truncation_policy: TruncationPolicy) -> Self {
|
||||
let config_token_limit = config.tool_output_token_limit;
|
||||
|
||||
match truncation_policy {
|
||||
TruncationPolicy::Bytes(family_bytes) => {
|
||||
if let Some(token_limit) = config_token_limit {
|
||||
Self::Bytes(approx_bytes_for_tokens(token_limit))
|
||||
} else {
|
||||
Self::Bytes(family_bytes)
|
||||
}
|
||||
}
|
||||
TruncationPolicy::Tokens(family_tokens) => {
|
||||
if let Some(token_limit) = config_token_limit {
|
||||
Self::Tokens(token_limit)
|
||||
} else {
|
||||
Self::Tokens(family_tokens)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a token budget derived from this policy.
|
||||
///
|
||||
/// - For `Tokens`, this is the explicit token limit.
|
||||
|
|
@ -313,7 +291,7 @@ pub(crate) fn approx_token_count(text: &str) -> usize {
|
|||
len.saturating_add(APPROX_BYTES_PER_TOKEN.saturating_sub(1)) / APPROX_BYTES_PER_TOKEN
|
||||
}
|
||||
|
||||
fn approx_bytes_for_tokens(tokens: usize) -> usize {
|
||||
pub(crate) fn approx_bytes_for_tokens(tokens: usize) -> usize {
|
||||
tokens.saturating_mul(APPROX_BYTES_PER_TOKEN)
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ mod json_result;
|
|||
mod list_dir;
|
||||
mod list_models;
|
||||
mod live_cli;
|
||||
mod model_info_overrides;
|
||||
mod model_overrides;
|
||||
mod model_tools;
|
||||
mod models_etag_responses;
|
||||
|
|
|
|||
32
codex-rs/core/tests/suite/model_info_overrides.rs
Normal file
32
codex-rs/core/tests/suite/model_info_overrides.rs
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
use codex_core::models_manager::manager::ModelsManager;
|
||||
use codex_protocol::openai_models::TruncationPolicyConfig;
|
||||
use core_test_support::load_default_config_for_test;
|
||||
use pretty_assertions::assert_eq;
|
||||
use tempfile::TempDir;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn offline_model_info_without_tool_output_override() {
|
||||
let codex_home = TempDir::new().expect("create temp dir");
|
||||
let config = load_default_config_for_test(&codex_home).await;
|
||||
|
||||
let model_info = ModelsManager::construct_model_info_offline("gpt-5.1", &config);
|
||||
|
||||
assert_eq!(
|
||||
model_info.truncation_policy,
|
||||
TruncationPolicyConfig::bytes(10_000)
|
||||
);
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn offline_model_info_with_tool_output_override() {
|
||||
let codex_home = TempDir::new().expect("create temp dir");
|
||||
let mut config = load_default_config_for_test(&codex_home).await;
|
||||
config.tool_output_token_limit = Some(123);
|
||||
|
||||
let model_info = ModelsManager::construct_model_info_offline("gpt-5.1-codex", &config);
|
||||
|
||||
assert_eq!(
|
||||
model_info.truncation_policy,
|
||||
TruncationPolicyConfig::tokens(123)
|
||||
);
|
||||
}
|
||||
|
|
@ -186,6 +186,95 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_models_truncation_policy_without_override_preserves_remote() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
.start()
|
||||
.await;
|
||||
|
||||
let slug = "codex-test-truncation-policy";
|
||||
let remote_model = test_remote_model_with_policy(
|
||||
slug,
|
||||
ModelVisibility::List,
|
||||
1,
|
||||
TruncationPolicyConfig::bytes(12_000),
|
||||
);
|
||||
mount_models_once(
|
||||
&server,
|
||||
ModelsResponse {
|
||||
models: vec![remote_model],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let harness = build_remote_models_harness(&server, |config| {
|
||||
config.model = Some("gpt-5.1".to_string());
|
||||
})
|
||||
.await?;
|
||||
|
||||
let models_manager = harness.thread_manager.get_models_manager();
|
||||
wait_for_model_available(&models_manager, slug, &harness.config).await;
|
||||
|
||||
let model_info = models_manager
|
||||
.construct_model_info(slug, &harness.config)
|
||||
.await;
|
||||
assert_eq!(
|
||||
model_info.truncation_policy,
|
||||
TruncationPolicyConfig::bytes(12_000)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_models_truncation_policy_with_tool_output_override() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
skip_if_sandbox!(Ok(()));
|
||||
|
||||
let server = MockServer::builder()
|
||||
.body_print_limit(BodyPrintLimit::Limited(80_000))
|
||||
.start()
|
||||
.await;
|
||||
|
||||
let slug = "codex-test-truncation-override";
|
||||
let remote_model = test_remote_model_with_policy(
|
||||
slug,
|
||||
ModelVisibility::List,
|
||||
1,
|
||||
TruncationPolicyConfig::bytes(10_000),
|
||||
);
|
||||
mount_models_once(
|
||||
&server,
|
||||
ModelsResponse {
|
||||
models: vec![remote_model],
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let harness = build_remote_models_harness(&server, |config| {
|
||||
config.model = Some("gpt-5.1".to_string());
|
||||
config.tool_output_token_limit = Some(50);
|
||||
})
|
||||
.await?;
|
||||
|
||||
let models_manager = harness.thread_manager.get_models_manager();
|
||||
wait_for_model_available(&models_manager, slug, &harness.config).await;
|
||||
|
||||
let model_info = models_manager
|
||||
.construct_model_info(slug, &harness.config)
|
||||
.await;
|
||||
assert_eq!(
|
||||
model_info.truncation_policy,
|
||||
TruncationPolicyConfig::bytes(200)
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn remote_models_apply_remote_base_instructions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
@ -461,6 +550,20 @@ where
|
|||
}
|
||||
|
||||
fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) -> ModelInfo {
|
||||
test_remote_model_with_policy(
|
||||
slug,
|
||||
visibility,
|
||||
priority,
|
||||
TruncationPolicyConfig::bytes(10_000),
|
||||
)
|
||||
}
|
||||
|
||||
fn test_remote_model_with_policy(
|
||||
slug: &str,
|
||||
visibility: ModelVisibility,
|
||||
priority: i32,
|
||||
truncation_policy: TruncationPolicyConfig,
|
||||
) -> ModelInfo {
|
||||
ModelInfo {
|
||||
slug: slug.to_string(),
|
||||
display_name: format!("{slug} display"),
|
||||
|
|
@ -480,7 +583,7 @@ fn test_remote_model(slug: &str, visibility: ModelVisibility, priority: i32) ->
|
|||
support_verbosity: false,
|
||||
default_verbosity: None,
|
||||
apply_patch_tool_type: None,
|
||||
truncation_policy: TruncationPolicyConfig::bytes(10_000),
|
||||
truncation_policy,
|
||||
supports_parallel_tool_calls: false,
|
||||
context_window: Some(272_000),
|
||||
auto_compact_token_limit: None,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue