Remote compact for API-key users (#7835)
This commit is contained in:
parent
9429e8b219
commit
b3ddd50eee
11 changed files with 101 additions and 78 deletions
|
|
@ -74,7 +74,7 @@ impl<'a> ChatRequestBuilder<'a> {
|
|||
ResponseItem::CustomToolCallOutput { .. } => {}
|
||||
ResponseItem::WebSearchCall { .. } => {}
|
||||
ResponseItem::GhostSnapshot { .. } => {}
|
||||
ResponseItem::CompactionSummary { .. } => {}
|
||||
ResponseItem::Compaction { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -303,7 +303,7 @@ impl<'a> ChatRequestBuilder<'a> {
|
|||
ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::Other
|
||||
| ResponseItem::CompactionSummary { .. } => {
|
||||
| ResponseItem::Compaction { .. } => {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2175,7 +2175,10 @@ pub(crate) async fn run_task(
|
|||
|
||||
// as long as compaction works well in getting us way below the token limit, we shouldn't worry about being in an infinite loop.
|
||||
if token_limit_reached {
|
||||
if should_use_remote_compact_task(&sess) {
|
||||
if should_use_remote_compact_task(
|
||||
sess.as_ref(),
|
||||
&turn_context.client.get_provider(),
|
||||
) {
|
||||
run_inline_remote_auto_compact_task(sess.clone(), turn_context.clone())
|
||||
.await;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::codex::Session;
|
||||
|
|
@ -18,7 +19,6 @@ use crate::truncate::TruncationPolicy;
|
|||
use crate::truncate::approx_token_count;
|
||||
use crate::truncate::truncate_text;
|
||||
use crate::util::backoff;
|
||||
use codex_app_server_protocol::AuthMode;
|
||||
use codex_protocol::items::TurnItem;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
|
|
@ -32,13 +32,11 @@ pub const SUMMARIZATION_PROMPT: &str = include_str!("../templates/compact/prompt
|
|||
pub const SUMMARY_PREFIX: &str = include_str!("../templates/compact/summary_prefix.md");
|
||||
const COMPACT_USER_MESSAGE_MAX_TOKENS: usize = 20_000;
|
||||
|
||||
pub(crate) fn should_use_remote_compact_task(session: &Session) -> bool {
|
||||
session
|
||||
.services
|
||||
.auth_manager
|
||||
.auth()
|
||||
.is_some_and(|auth| auth.mode == AuthMode::ChatGPT)
|
||||
&& session.enabled(Feature::RemoteCompaction)
|
||||
pub(crate) fn should_use_remote_compact_task(
|
||||
session: &Session,
|
||||
provider: &ModelProviderInfo,
|
||||
) -> bool {
|
||||
provider.is_openai() && session.enabled(Feature::RemoteCompaction)
|
||||
}
|
||||
|
||||
pub(crate) async fn run_inline_auto_compact_task(
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ impl ContextManager {
|
|||
encrypted_content: Some(content),
|
||||
..
|
||||
}
|
||||
| ResponseItem::CompactionSummary {
|
||||
| ResponseItem::Compaction {
|
||||
encrypted_content: content,
|
||||
} => estimate_reasoning_length(content.len()) as i64,
|
||||
item => {
|
||||
|
|
@ -258,7 +258,7 @@ impl ContextManager {
|
|||
| ResponseItem::FunctionCall { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CustomToolCall { .. }
|
||||
| ResponseItem::CompactionSummary { .. }
|
||||
| ResponseItem::Compaction { .. }
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::Other => item.clone(),
|
||||
}
|
||||
|
|
@ -277,7 +277,7 @@ fn is_api_message(message: &ResponseItem) -> bool {
|
|||
| ResponseItem::LocalShellCall { .. }
|
||||
| ResponseItem::Reasoning { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::CompactionSummary { .. } => true,
|
||||
| ResponseItem::Compaction { .. } => true,
|
||||
ResponseItem::GhostSnapshot { .. } => false,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,8 @@ const MAX_STREAM_MAX_RETRIES: u64 = 100;
|
|||
const MAX_REQUEST_MAX_RETRIES: u64 = 100;
|
||||
pub const CHAT_WIRE_API_DEPRECATION_SUMMARY: &str = r#"Support for the "chat" wire API is deprecated and will soon be removed. Update your model provider definition in config.toml to use wire_api = "responses"."#;
|
||||
|
||||
const OPENAI_PROVIDER_NAME: &str = "OpenAI";
|
||||
|
||||
/// Wire protocol that the provider speaks. Most third-party services only
|
||||
/// implement the classic OpenAI Chat Completions JSON schema, whereas OpenAI
|
||||
/// itself (and a handful of others) additionally expose the more modern
|
||||
|
|
@ -210,7 +212,7 @@ impl ModelProviderInfo {
|
|||
}
|
||||
pub fn create_openai_provider() -> ModelProviderInfo {
|
||||
ModelProviderInfo {
|
||||
name: "OpenAI".into(),
|
||||
name: OPENAI_PROVIDER_NAME.into(),
|
||||
// Allow users to override the default OpenAI endpoint by
|
||||
// exporting `OPENAI_BASE_URL`. This is useful when pointing
|
||||
// Codex at a proxy, mock server, or Azure-style deployment
|
||||
|
|
@ -247,6 +249,10 @@ impl ModelProviderInfo {
|
|||
requires_openai_auth: true,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_openai(&self) -> bool {
|
||||
self.name == OPENAI_PROVIDER_NAME
|
||||
}
|
||||
}
|
||||
|
||||
pub const DEFAULT_LMSTUDIO_PORT: u16 = 1234;
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ pub(crate) fn should_persist_response_item(item: &ResponseItem) -> bool {
|
|||
| ResponseItem::CustomToolCallOutput { .. }
|
||||
| ResponseItem::WebSearchCall { .. }
|
||||
| ResponseItem::GhostSnapshot { .. }
|
||||
| ResponseItem::CompactionSummary { .. } => true,
|
||||
| ResponseItem::Compaction { .. } => true,
|
||||
ResponseItem::Other => false,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,10 @@ impl SessionTask for CompactTask {
|
|||
_cancellation_token: CancellationToken,
|
||||
) -> Option<String> {
|
||||
let session = session.clone_session();
|
||||
if crate::compact::should_use_remote_compact_task(&session) {
|
||||
if crate::compact::should_use_remote_compact_task(
|
||||
session.as_ref(),
|
||||
&ctx.client.get_provider(),
|
||||
) {
|
||||
crate::compact_remote::run_remote_compact_task(session, ctx).await
|
||||
} else {
|
||||
crate::compact::run_compact_task(session, ctx, input).await
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ use core_test_support::responses::sse_failed;
|
|||
use core_test_support::responses::start_mock_server;
|
||||
use pretty_assertions::assert_eq;
|
||||
use serde_json::json;
|
||||
use wiremock::MockServer;
|
||||
// --- Test helpers -----------------------------------------------------------
|
||||
|
||||
pub(super) const FIRST_REPLY: &str = "FIRST_REPLY";
|
||||
|
|
@ -100,6 +101,13 @@ fn json_fragment(text: &str) -> String {
|
|||
.to_string()
|
||||
}
|
||||
|
||||
fn non_openai_model_provider(server: &MockServer) -> ModelProviderInfo {
|
||||
let mut provider = built_in_model_providers()["openai"].clone();
|
||||
provider.name = "OpenAI (test)".into();
|
||||
provider.base_url = Some(format!("{}/v1", server.uri()));
|
||||
provider
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn summarize_context_three_requests_and_instructions() {
|
||||
skip_if_no_network!();
|
||||
|
|
@ -127,10 +135,7 @@ async fn summarize_context_three_requests_and_instructions() {
|
|||
let request_log = mount_sse_sequence(&server, vec![sse1, sse2, sse3]).await;
|
||||
|
||||
// Build config pointing to the mock server and spawn Codex.
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
|
|
@ -324,10 +329,7 @@ async fn manual_compact_uses_custom_prompt() {
|
|||
|
||||
let custom_prompt = "Use this compact prompt instead";
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
|
|
@ -407,10 +409,7 @@ async fn manual_compact_emits_api_and_local_token_usage_events() {
|
|||
]);
|
||||
mount_sse_once(&server, sse_compact).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
config.model_provider = model_provider;
|
||||
|
|
@ -467,7 +466,11 @@ async fn multiple_auto_compact_per_task_runs_after_token_limit_hit() {
|
|||
|
||||
let server = start_mock_server().await;
|
||||
|
||||
let non_openai_provider_name = non_openai_model_provider(&server).name;
|
||||
let codex = test_codex()
|
||||
.with_config(move |config| {
|
||||
config.model_provider.name = non_openai_provider_name;
|
||||
})
|
||||
.build(&server)
|
||||
.await
|
||||
.expect("build codex")
|
||||
|
|
@ -1050,10 +1053,7 @@ async fn auto_compact_runs_after_token_limit_hit() {
|
|||
};
|
||||
mount_sse_once_match(&server, fourth_matcher, sse4).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1299,10 +1299,7 @@ async fn auto_compact_persists_rollout_entries() {
|
|||
};
|
||||
mount_sse_once_match(&server, third_matcher, sse3).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1403,10 +1400,7 @@ async fn manual_compact_retries_after_context_window_error() {
|
|||
)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1539,10 +1533,7 @@ async fn manual_compact_twice_preserves_latest_user_messages() {
|
|||
)
|
||||
.await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1743,10 +1734,7 @@ async fn auto_compact_allows_multiple_attempts_when_interleaved_with_other_turn_
|
|||
|
||||
mount_sse_sequence(&server, vec![sse1, sse2, sse3, sse4, sse5, sse6]).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1856,10 +1844,7 @@ async fn auto_compact_triggers_after_function_call_over_95_percent_usage() {
|
|||
// We don't assert on the post-compact request, so no need to keep its mock.
|
||||
mount_sse_once(&server, post_auto_compact_turn).await;
|
||||
|
||||
let model_provider = ModelProviderInfo {
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
let model_provider = non_openai_model_provider(&server);
|
||||
|
||||
let home = TempDir::new().unwrap();
|
||||
let mut config = load_default_config_for_test(&home);
|
||||
|
|
@ -1961,13 +1946,18 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
|
|||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::OutputText {
|
||||
text: "REMOTE_COMPACT_SUMMARY".to_string(),
|
||||
}],
|
||||
}];
|
||||
let compacted_history = vec![
|
||||
codex_protocol::models::ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
content: vec![codex_protocol::models::ContentItem::OutputText {
|
||||
text: "REMOTE_COMPACT_SUMMARY".to_string(),
|
||||
}],
|
||||
},
|
||||
codex_protocol::models::ResponseItem::Compaction {
|
||||
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
|
||||
},
|
||||
];
|
||||
let compact_mock =
|
||||
mount_compact_json_once(&server, serde_json::json!({ "output": compacted_history })).await;
|
||||
|
||||
|
|
@ -2028,4 +2018,8 @@ async fn auto_compact_counts_encrypted_reasoning_before_last_user() {
|
|||
resume_body.contains("REMOTE_COMPACT_SUMMARY") || resume_body.contains(FINAL_REPLY),
|
||||
"resume request should follow remote compact and use compacted history"
|
||||
);
|
||||
assert!(
|
||||
resume_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
|
||||
"resume request should include compaction summary item"
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,13 +51,18 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
|||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
|
||||
}],
|
||||
}];
|
||||
let compacted_history = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
|
||||
}],
|
||||
},
|
||||
ResponseItem::Compaction {
|
||||
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
|
||||
},
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history.clone() }),
|
||||
|
|
@ -120,6 +125,10 @@ async fn remote_compact_replaces_history_for_followups() -> Result<()> {
|
|||
follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"),
|
||||
"expected follow-up request to use compacted history"
|
||||
);
|
||||
assert!(
|
||||
follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"),
|
||||
"expected follow-up request to include compaction summary item"
|
||||
);
|
||||
assert!(
|
||||
!follow_up_body.contains("FIRST_REMOTE_REPLY"),
|
||||
"expected follow-up request to drop pre-compaction assistant messages"
|
||||
|
|
@ -159,13 +168,18 @@ async fn remote_compact_runs_automatically() -> Result<()> {
|
|||
)
|
||||
.await;
|
||||
|
||||
let compacted_history = vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
|
||||
}],
|
||||
}];
|
||||
let compacted_history = vec![
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText {
|
||||
text: "REMOTE_COMPACTED_SUMMARY".to_string(),
|
||||
}],
|
||||
},
|
||||
ResponseItem::Compaction {
|
||||
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
|
||||
},
|
||||
];
|
||||
let compact_mock = responses::mount_compact_json_once(
|
||||
harness.server(),
|
||||
serde_json::json!({ "output": compacted_history.clone() }),
|
||||
|
|
@ -190,6 +204,7 @@ async fn remote_compact_runs_automatically() -> Result<()> {
|
|||
assert_eq!(compact_mock.requests().len(), 1);
|
||||
let follow_up_body = responses_mock.single_request().body_json().to_string();
|
||||
assert!(follow_up_body.contains("REMOTE_COMPACTED_SUMMARY"));
|
||||
assert!(follow_up_body.contains("ENCRYPTED_COMPACTION_SUMMARY"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -226,6 +241,9 @@ async fn remote_compact_persists_replacement_history_in_rollout() -> Result<()>
|
|||
text: "COMPACTED_USER_SUMMARY".to_string(),
|
||||
}],
|
||||
},
|
||||
ResponseItem::Compaction {
|
||||
encrypted_content: "ENCRYPTED_COMPACTION_SUMMARY".to_string(),
|
||||
},
|
||||
ResponseItem::Message {
|
||||
id: None,
|
||||
role: "assistant".to_string(),
|
||||
|
|
|
|||
|
|
@ -857,6 +857,7 @@ async fn start_test_conversation(
|
|||
model: Option<&str>,
|
||||
) -> (TempDir, Config, ConversationManager, Arc<CodexConversation>) {
|
||||
let model_provider = ModelProviderInfo {
|
||||
name: "Non-OpenAI Model provider".into(),
|
||||
base_url: Some(format!("{}/v1", server.uri())),
|
||||
..built_in_model_providers()["openai"].clone()
|
||||
};
|
||||
|
|
|
|||
|
|
@ -150,8 +150,8 @@ pub enum ResponseItem {
|
|||
GhostSnapshot {
|
||||
ghost_commit: GhostCommit,
|
||||
},
|
||||
#[serde(alias = "compaction")]
|
||||
CompactionSummary {
|
||||
#[serde(alias = "compaction_summary")]
|
||||
Compaction {
|
||||
encrypted_content: String,
|
||||
},
|
||||
#[serde(other)]
|
||||
|
|
@ -671,13 +671,13 @@ mod tests {
|
|||
|
||||
#[test]
|
||||
fn deserializes_compaction_alias() -> Result<()> {
|
||||
let json = r#"{"type":"compaction","encrypted_content":"abc"}"#;
|
||||
let json = r#"{"type":"compaction_summary","encrypted_content":"abc"}"#;
|
||||
|
||||
let item: ResponseItem = serde_json::from_str(json)?;
|
||||
|
||||
assert_eq!(
|
||||
item,
|
||||
ResponseItem::CompactionSummary {
|
||||
ResponseItem::Compaction {
|
||||
encrypted_content: "abc".into(),
|
||||
}
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue