Remote compact for API-key users (#7835)

This commit is contained in:
pakrym-oai 2025-12-12 10:05:02 -08:00 committed by GitHub
parent 9429e8b219
commit b3ddd50eee
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 101 additions and 78 deletions

View file

@ -74,7 +74,7 @@ impl<'a> ChatRequestBuilder<'a> {
ResponseItem::CustomToolCallOutput { .. } => {}
ResponseItem::WebSearchCall { .. } => {}
ResponseItem::GhostSnapshot { .. } => {}
ResponseItem::CompactionSummary { .. } => {}
ResponseItem::Compaction { .. } => {}
}
}
@ -303,7 +303,7 @@ impl<'a> ChatRequestBuilder<'a> {
ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::Other
| ResponseItem::CompactionSummary { .. } => {
| ResponseItem::Compaction { .. } => {
continue;
}
}

View file

@ -2175,7 +2175,10 @@ pub(crate) async fn run_task(
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
if token_limit_reached {
if should_use_remote_compact_task(&sess) {
if should_use_remote_compact_task(
sess.as_ref(),
&turn_context.client.get_provider(),
) {
run_inline_remote_auto_compact_task(sess.clone(), turn_context.clone())
.await;
} else {

View file

@ -1,5 +1,6 @@
use std::sync::Arc;
use crate::ModelProviderInfo;
use crate::Prompt;
use crate::client_common::ResponseEvent;
use crate::codex::Session;
@ -18,7 +19,6 @@ use crate::truncate::TruncationPolicy;
use crate::truncate::approx_token_count;
use crate::truncate::truncate_text;
use crate::util::backoff;
use codex_app_server_protocol::AuthMode;
use codex_protocol::items::TurnItem;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseInputItem;
@ -32,13 +32,11 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
pub(crate) fn should_use_remote_compact_task(session: &Session) -> bool {
session
.services
.auth_manager
.auth()
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
&& session.enabled(Feature::RemoteCompaction)
pub(crate) fn should_use_remote_compact_task(
session: &Session,
provider: &ModelProviderInfo,
) -> bool {
provider.is_openai() && session.enabled(Feature::RemoteCompaction)
}
pub(crate) async fn run_inline_auto_compact_task(

View file

@ -92,7 +92,7 @@ impl ContextManager {
encrypted_content: Some(content),
..
}
| ResponseItem::CompactionSummary {
| ResponseItem::Compaction {
encrypted_content: content,
} => estimate_reasoning_length(content.len()) as i64,
item => {
@ -258,7 +258,7 @@ impl ContextManager {
| ResponseItem::FunctionCall { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::CustomToolCall { .. }
| ResponseItem::CompactionSummary { .. }
| ResponseItem::Compaction { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::Other => item.clone(),
}
@ -277,7 +277,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
| ResponseItem::LocalShellCall { .. }
| ResponseItem::Reasoning { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::CompactionSummary { .. } => true,
| ResponseItem::Compaction { .. } => true,
ResponseItem::GhostSnapshot { .. } => false,
ResponseItem::Other => false,
}

View file

@ -28,6 +28,8 @@ const MAX_STREAM_MAX_RETRIES: u64 = 100;
const MAX_REQUEST_MAX_RETRIES: u64 = 100;
pub const CHAT_WIRE_API_DEPRECATION_SUMMARY: &str = r#"Support for the "chat" wire API is deprecated and will soon be removed. Update your model provider definition in config.toml to use wire_api = "responses"."#;
const OPENAI_PROVIDER_NAME: &str = "OpenAI";
/// Wire protocol that the provider speaks. Most third-party services only
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
/// itself (and a handful of others) additionally expose the more modern
@ -210,7 +212,7 @@ impl ModelProviderInfo {
}
pub fn create_openai_provider() -> ModelProviderInfo {
ModelProviderInfo {
name: "OpenAI".into(),
name: OPENAI_PROVIDER_NAME.into(),
// Allow users to override the default OpenAI endpoint by
// exporting `OPENAI_BASE_URL`. This is useful when pointing
// Codex at a proxy, mock server, or Azure-style deployment
@ -247,6 +249,10 @@ impl ModelProviderInfo {
requires_openai_auth: true,
}
}
pub fn is_openai(&self) -> bool {
self.name == OPENAI_PROVIDER_NAME
}
}
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;

View file

@ -28,7 +28,7 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
| ResponseItem::CustomToolCallOutput { .. }
| ResponseItem::WebSearchCall { .. }
| ResponseItem::GhostSnapshot { .. }
| ResponseItem::CompactionSummary { .. } => true,
| ResponseItem::Compaction { .. } => true,
ResponseItem::Other => false,
}
}

View file

@ -25,7 +25,10 @@ impl SessionTask for CompactTask {
_cancellation_token: CancellationToken,
) -> Option<String> {
let session = session.clone_session();
if crate::compact::should_use_remote_compact_task(&session) {
if crate::compact::should_use_remote_compact_task(
session.as_ref(),
&ctx.client.get_provider(),
) {
crate::compact_remote::run_remote_compact_task(session, ctx).await
} else {
crate::compact::run_compact_task(session, ctx, input).await

View file

@ -38,6 +38,7 @@ use core_test_support::responses::sse_failed;
use core_test_support::responses::start_mock_server;
use pretty_assertions::assert_eq;
use serde_json::json;
use wiremock::MockServer;
// --- Test helpers -----------------------------------------------------------
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
@ -100,6 +101,13 @@ fn json_fragment(text: &str) -> String {
.to_string()
}
fn non_openai_model_provider(server: &MockServer) -> ModelProviderInfo {
let mut provider = built_in_model_providers()["openai"].clone();
provider.name = "OpenAI (test)".into();
provider.base_url = Some(format!("{}/v1", server.uri()));
provider
}
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
async fn summarize_context_three_requests_and_instructions() {
skip_if_no_network!();
@ -127,10 +135,7 @@ async fn summarize_context_three_requests_and_instructions() {
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3]).await;
// Build config pointing to the mock server and spawn Codex.
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
@ -324,10 +329,7 @@ async fn manual_compact_uses_custom_prompt() {
let custom_prompt = "Use this compact prompt instead";
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
@ -407,10 +409,7 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
]);
mount_sse_once(&server, sse_compact).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
config.model_provider = model_provider;
@ -467,7 +466,11 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
let server = start_mock_server().await;
let non_openai_provider_name = non_openai_model_provider(&server).name;
let codex = test_codex()
.with_config(move |config| {
config.model_provider.name = non_openai_provider_name;
})
.build(&server)
.await
.expect("build codex")
@ -1050,10 +1053,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
};
mount_sse_once_match(&server, fourth_matcher, sse4).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1299,10 +1299,7 @@ async fn auto_compact_persists_rollout_entries() {
};
mount_sse_once_match(&server, third_matcher, sse3).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1403,10 +1400,7 @@ async fn manual_compact_retries_after_context_window_error() {
)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1539,10 +1533,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
)
.await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1743,10 +1734,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1856,10 +1844,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
// We don't assert on the post-compact request, so no need to keep its mock.
mount_sse_once(&server, post_auto_compact_turn).await;
let model_provider = ModelProviderInfo {
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};
let model_provider = non_openai_model_provider(&server);
let home = TempDir::new().unwrap();
let mut config = load_default_config_for_test(&home);
@ -1961,13 +1946,18 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
)
.await;
let compacted_history = vec![codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: "REMOTE_COMPACT_SUMMARY".to_string(),
}],
}];
let compacted_history = vec![
codex_protocol::models::ResponseItem::Message {
id: None,
role: "assistant".to_string(),
content: vec![codex_protocol::models::ContentItem::OutputText {
text: "REMOTE_COMPACT_SUMMARY".to_string(),
}],
},
codex_protocol::models::ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
},
];
let compact_mock =
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
@ -2028,4 +2018,8 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
resume_body.contains("REMOTE_COMPACT_SUMMARY") || resume_body.contains(FINAL_REPLY),
"resume request should follow remote compact and use compacted history"
);
assert!(
resume_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
"resume request should include compaction summary item"
);
}

View file

@ -51,13 +51,18 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
)
.await;
let compacted_history = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
}];
let compacted_history = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
},
ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
},
];
let compact_mock = responses::mount_compact_json_once(
harness.server(),
serde_json::json!({ "output": compacted_history.clone() }),
@ -120,6 +125,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
"expected follow-up request to use compacted history"
);
assert!(
follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
"expected follow-up request to include compaction summary item"
);
assert!(
!follow_up_body.contains("FIRST_REMOTE_REPLY"),
"expected follow-up request to drop pre-compaction assistant messages"
@ -159,13 +168,18 @@ async fn remote_compact_runs_automatically() -> Result<()> {
)
.await;
let compacted_history = vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
}];
let compacted_history = vec![
ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
}],
},
ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
},
];
let compact_mock = responses::mount_compact_json_once(
harness.server(),
serde_json::json!({ "output": compacted_history.clone() }),
@ -190,6 +204,7 @@ async fn remote_compact_runs_automatically() -> Result<()> {
assert_eq!(compact_mock.requests().len(), 1);
let follow_up_body = responses_mock.single_request().body_json().to_string();
assert!(follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"));
assert!(follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"));
Ok(())
}
@ -226,6 +241,9 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()>
text: "COMPACTED_USER_SUMMARY".to_string(),
}],
},
ResponseItem::Compaction {
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
},
ResponseItem::Message {
id: None,
role: "assistant".to_string(),

View file

@ -857,6 +857,7 @@ async fn start_test_conversation(
model: Option<&str>,
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
let model_provider = ModelProviderInfo {
name: "Non-OpenAI Model provider".into(),
base_url: Some(format!("{}/v1", server.uri())),
..built_in_model_providers()["openai"].clone()
};

View file

@ -150,8 +150,8 @@ pub enum ResponseItem {
GhostSnapshot {
ghost_commit: GhostCommit,
},
#[serde(alias = "compaction")]
CompactionSummary {
#[serde(alias = "compaction_summary")]
Compaction {
encrypted_content: String,
},
#[serde(other)]
@ -671,13 +671,13 @@ mod tests {
#[test]
fn deserializes_compaction_alias() -> Result<()> {
let json = r#"{"type":"compaction","encrypted_content":"abc"}"#;
let json = r#"{"type":"compaction_summary","encrypted_content":"abc"}"#;
let item: ResponseItem = serde_json::from_str(json)?;
assert_eq!(
item,
ResponseItem::CompactionSummary {
ResponseItem::Compaction {
encrypted_content: "abc".into(),
}
);