chore: unify memory job flow (#11334)

This commit is contained in:
jif-oai 2026-02-10 20:26:39 +00:00 committed by GitHub
parent 58a59a2dae
commit a6e9469fa4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
25 changed files with 2455 additions and 3292 deletions

View file

@ -13,7 +13,6 @@ use crate::agent::AgentControl;
use crate::agent::AgentStatus;
use crate::agent::MAX_THREAD_SPAWN_DEPTH;
use crate::agent::agent_status_from_event;
use crate::agent::status::is_final as is_final_agent_status;
use crate::analytics_client::AnalyticsEventsClient;
use crate::analytics_client::build_track_events_context;
use crate::apps::render_apps_section;
@ -111,7 +110,6 @@ use crate::client::ModelClient;
use crate::client::ModelClientSession;
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::codex_thread::ThreadConfigSnapshot;
use crate::compact::collect_user_messages;
use crate::config::Config;
@ -192,10 +190,8 @@ use crate::protocol::TokenUsage;
use crate::protocol::TokenUsageInfo;
use crate::protocol::TurnDiffEvent;
use crate::protocol::WarningEvent;
use crate::rollout::INTERACTIVE_SESSION_SOURCES;
use crate::rollout::RolloutRecorder;
use crate::rollout::RolloutRecorderParams;
use crate::rollout::list::ThreadSortKey;
use crate::rollout::map_session_init_error;
use crate::rollout::metadata;
use crate::shell;
@ -249,8 +245,6 @@ use codex_protocol::user_input::UserInput;
use codex_utils_readiness::Readiness;
use codex_utils_readiness::ReadinessFlag;
mod memory_startup;
/// The high-level interface to the Codex system.
/// It operates as a queue pair where you send submissions and receive events.
pub struct Codex {
@ -1241,7 +1235,7 @@ impl Session {
// record_initial_history can emit events. We record only after the SessionConfiguredEvent is emitted.
sess.record_initial_history(initial_history).await;
memory_startup::start_memories_startup_task(
memories::start_memories_startup_task(
&sess,
Arc::clone(&config),
&session_configuration.session_source,

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,59 @@
use crate::path_utils::normalize_for_path_comparison;
use sha2::Digest;
use sha2::Sha256;
use std::path::Path;
use std::path::PathBuf;
use super::scope::MEMORY_SCOPE_KEY_USER;
pub(super) const MEMORY_SUBDIR: &str = "memory";
pub(super) const RAW_MEMORIES_SUBDIR: &str = "raw_memories";
pub(super) const MEMORY_SUMMARY_FILENAME: &str = "memory_summary.md";
pub(super) const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
pub(super) const LEGACY_CONSOLIDATED_FILENAME: &str = "consolidated.md";
pub(super) const SKILLS_SUBDIR: &str = "skills";
const CWD_MEMORY_BUCKET_HEX_LEN: usize = 16;
/// Returns the on-disk memory root directory for a given working directory.
///
/// The cwd is normalized and hashed into a deterministic bucket under
/// `<codex_home>/memories/<hash>/memory`.
pub(super) fn memory_root_for_cwd(codex_home: &Path, cwd: &Path) -> PathBuf {
let bucket = memory_bucket_for_cwd(cwd);
codex_home.join("memories").join(bucket).join(MEMORY_SUBDIR)
}
/// Returns the on-disk user-shared memory root directory.
pub(super) fn memory_root_for_user(codex_home: &Path) -> PathBuf {
codex_home
.join("memories")
.join(MEMORY_SCOPE_KEY_USER)
.join(MEMORY_SUBDIR)
}
pub(super) fn raw_memories_dir(root: &Path) -> PathBuf {
root.join(RAW_MEMORIES_SUBDIR)
}
pub(super) fn memory_summary_file(root: &Path) -> PathBuf {
root.join(MEMORY_SUMMARY_FILENAME)
}
/// Ensures the phase-1 memory directory layout exists for the given root.
pub(super) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
tokio::fs::create_dir_all(raw_memories_dir(root)).await
}
fn memory_bucket_for_cwd(cwd: &Path) -> String {
let normalized = normalize_cwd_for_memory(cwd);
let normalized = normalized.to_string_lossy();
let mut hasher = Sha256::new();
hasher.update(normalized.as_bytes());
let full_hash = format!("{:x}", hasher.finalize());
full_hash[..CWD_MEMORY_BUCKET_HEX_LEN].to_string()
}
fn normalize_cwd_for_memory(cwd: &Path) -> PathBuf {
normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf())
}

View file

@ -1,109 +1,46 @@
mod phase_one;
//! Memory subsystem for startup extraction and consolidation.
//!
//! The startup memory pipeline is split into two phases:
//! - Phase 1: select rollouts, extract stage-1 raw memories, persist stage-1 outputs, and enqueue consolidation.
//! - Phase 2: claim scopes, materialize consolidation inputs, and dispatch consolidation agents.
mod layout;
mod prompts;
mod rollout;
mod selection;
mod scope;
mod stage_one;
mod startup;
mod storage;
mod text;
mod types;
#[cfg(test)]
mod tests;
use crate::path_utils::normalize_for_path_comparison;
use sha2::Digest;
use sha2::Sha256;
use std::path::Path;
use std::path::PathBuf;
/// Subagent source label used to identify consolidation tasks.
pub(crate) const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
const MEMORY_CONSOLIDATION_SUBAGENT_LABEL: &str = "memory_consolidation";
/// Maximum number of rollout candidates processed per startup pass.
pub(crate) const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
const MAX_ROLLOUTS_PER_STARTUP: usize = 64;
/// Concurrency cap for startup memory extraction and consolidation scheduling.
pub(crate) const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
const PHASE_ONE_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
/// Concurrency cap for phase-2 consolidation dispatch.
const PHASE_TWO_CONCURRENCY_LIMIT: usize = MAX_ROLLOUTS_PER_STARTUP;
/// Maximum number of recent raw memories retained per scope.
pub(crate) const MAX_RAW_MEMORIES_PER_SCOPE: usize = 64;
const MAX_RAW_MEMORIES_PER_SCOPE: usize = 64;
/// Maximum rollout age considered for phase-1 extraction.
pub(crate) const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
const PHASE_ONE_MAX_ROLLOUT_AGE_DAYS: i64 = 30;
/// Lease duration (seconds) for phase-1 job ownership.
pub(crate) const PHASE_ONE_JOB_LEASE_SECONDS: i64 = 3_600;
const PHASE_ONE_JOB_LEASE_SECONDS: i64 = 3_600;
/// Backoff delay (seconds) before retrying a failed stage-1 extraction job.
const PHASE_ONE_JOB_RETRY_DELAY_SECONDS: i64 = 3_600;
/// Lease duration (seconds) for phase-2 consolidation job ownership.
pub(crate) const PHASE_TWO_JOB_LEASE_SECONDS: i64 = 3_600;
const PHASE_TWO_JOB_LEASE_SECONDS: i64 = 3_600;
/// Backoff delay (seconds) before retrying a failed phase-2 consolidation job.
const PHASE_TWO_JOB_RETRY_DELAY_SECONDS: i64 = 3_600;
/// Heartbeat interval (seconds) for phase-2 running jobs.
pub(crate) const PHASE_TWO_JOB_HEARTBEAT_SECONDS: u64 = 30;
pub(crate) const MEMORY_SCOPE_KIND_CWD: &str = "cwd";
pub(crate) const MEMORY_SCOPE_KIND_USER: &str = "user";
pub(crate) const MEMORY_SCOPE_KEY_USER: &str = "user";
const PHASE_TWO_JOB_HEARTBEAT_SECONDS: u64 = 30;
const MEMORY_SUBDIR: &str = "memory";
const RAW_MEMORIES_SUBDIR: &str = "raw_memories";
const MEMORY_SUMMARY_FILENAME: &str = "memory_summary.md";
const MEMORY_REGISTRY_FILENAME: &str = "MEMORY.md";
const LEGACY_CONSOLIDATED_FILENAME: &str = "consolidated.md";
const SKILLS_SUBDIR: &str = "skills";
const CWD_MEMORY_BUCKET_HEX_LEN: usize = 16;
pub(crate) use phase_one::RAW_MEMORY_PROMPT;
pub(crate) use phase_one::parse_stage_one_output;
pub(crate) use phase_one::stage_one_output_schema;
pub(crate) use prompts::build_consolidation_prompt;
pub(crate) use prompts::build_stage_one_input_message;
#[cfg(test)]
pub(crate) use rollout::StageOneResponseItemKinds;
pub(crate) use rollout::StageOneRolloutFilter;
pub(crate) use rollout::serialize_filtered_rollout_response_items;
pub(crate) use selection::select_rollout_candidates_from_db;
pub(crate) use storage::prune_to_recent_memories_and_rebuild_summary;
pub(crate) use storage::rebuild_memory_summary_from_memories;
pub(crate) use storage::sync_raw_memories_from_memories;
pub(crate) use storage::wipe_consolidation_outputs;
pub(crate) use types::RolloutCandidate;
/// Returns the on-disk memory root directory for a given working directory.
/// Starts the memory startup pipeline for eligible root sessions.
///
/// The cwd is normalized and hashed into a deterministic bucket under
/// `<codex_home>/memories/<hash>/memory`.
pub(crate) fn memory_root_for_cwd(codex_home: &Path, cwd: &Path) -> PathBuf {
let bucket = memory_bucket_for_cwd(cwd);
codex_home.join("memories").join(bucket).join(MEMORY_SUBDIR)
}
/// Returns the DB scope key for a cwd-scoped memory entry.
///
/// This uses the same normalization/fallback behavior as cwd bucket derivation.
pub(crate) fn memory_scope_key_for_cwd(cwd: &Path) -> String {
normalize_cwd_for_memory(cwd).display().to_string()
}
/// Returns the on-disk user-shared memory root directory.
pub(crate) fn memory_root_for_user(codex_home: &Path) -> PathBuf {
codex_home
.join("memories")
.join(MEMORY_SCOPE_KEY_USER)
.join(MEMORY_SUBDIR)
}
fn raw_memories_dir(root: &Path) -> PathBuf {
root.join(RAW_MEMORIES_SUBDIR)
}
fn memory_summary_file(root: &Path) -> PathBuf {
root.join(MEMORY_SUMMARY_FILENAME)
}
/// Ensures the phase-1 memory directory layout exists for the given root.
pub(crate) async fn ensure_layout(root: &Path) -> std::io::Result<()> {
tokio::fs::create_dir_all(raw_memories_dir(root)).await
}
fn memory_bucket_for_cwd(cwd: &Path) -> String {
let normalized = normalize_cwd_for_memory(cwd);
let normalized = normalized.to_string_lossy();
let mut hasher = Sha256::new();
hasher.update(normalized.as_bytes());
let full_hash = format!("{:x}", hasher.finalize());
full_hash[..CWD_MEMORY_BUCKET_HEX_LEN].to_string()
}
fn normalize_cwd_for_memory(cwd: &Path) -> PathBuf {
normalize_for_path_comparison(cwd).unwrap_or_else(|_| cwd.to_path_buf())
}
/// This is the single entrypoint that `codex` uses to trigger memory startup.
pub(crate) use startup::start_memories_startup_task;

View file

@ -2,6 +2,9 @@ use askama::Template;
use std::path::Path;
use tracing::warn;
use super::text::prefix_at_char_boundary;
use super::text::suffix_at_char_boundary;
const MAX_ROLLOUT_BYTES_FOR_PROMPT: usize = 1_000_000;
#[derive(Template)]
@ -20,7 +23,7 @@ struct StageOneInputTemplate<'a> {
/// Builds the consolidation subagent prompt for a specific memory root.
///
/// Falls back to a simple string replacement if Askama rendering fails.
pub(crate) fn build_consolidation_prompt(memory_root: &Path) -> String {
pub(super) fn build_consolidation_prompt(memory_root: &Path) -> String {
let memory_root = memory_root.display().to_string();
let template = ConsolidationPromptTemplate {
memory_root: &memory_root,
@ -39,7 +42,7 @@ pub(crate) fn build_consolidation_prompt(memory_root: &Path) -> String {
///
/// Large rollout payloads are truncated to a bounded byte budget while keeping
/// both head and tail context.
pub(crate) fn build_stage_one_input_message(rollout_path: &Path, rollout_contents: &str) -> String {
pub(super) fn build_stage_one_input_message(rollout_path: &Path, rollout_contents: &str) -> String {
let (rollout_contents, truncated) = truncate_rollout_for_prompt(rollout_contents);
if truncated {
warn!(
@ -82,35 +85,6 @@ fn truncate_rollout_for_prompt(input: &str) -> (String, bool) {
(truncated, true)
}
fn prefix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let mut end = 0;
for (idx, _) in input.char_indices() {
if idx > max_bytes {
break;
}
end = idx;
}
&input[..end]
}
fn suffix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let start_limit = input.len().saturating_sub(max_bytes);
let mut start = input.len();
for (idx, _) in input.char_indices().rev() {
if idx < start_limit {
break;
}
start = idx;
}
&input[start..]
}
#[cfg(test)]
mod tests {
use super::*;

View file

@ -5,7 +5,7 @@ use codex_protocol::protocol::RolloutItem;
/// Bitmask selector for `ResponseItem` variants retained from rollout JSONL.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct StageOneResponseItemKinds(u16);
pub(super) struct StageOneResponseItemKinds(u16);
impl StageOneResponseItemKinds {
const MESSAGE: u16 = 1 << 0;
@ -20,7 +20,7 @@ impl StageOneResponseItemKinds {
const COMPACTION: u16 = 1 << 9;
const OTHER: u16 = 1 << 10;
pub(crate) const fn all() -> Self {
pub(super) const fn all() -> Self {
Self(
Self::MESSAGE
| Self::REASONING
@ -37,7 +37,7 @@ impl StageOneResponseItemKinds {
}
#[cfg(test)]
pub(crate) const fn messages_only() -> Self {
pub(super) const fn messages_only() -> Self {
Self(Self::MESSAGE)
}
@ -72,19 +72,19 @@ impl Default for StageOneResponseItemKinds {
/// Controls which rollout item kinds are retained for stage-1 memory extraction.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(crate) struct StageOneRolloutFilter {
pub(super) struct StageOneRolloutFilter {
/// Keep `RolloutItem::ResponseItem` entries.
pub(crate) keep_response_items: bool,
pub(super) keep_response_items: bool,
/// Keep `RolloutItem::Compacted` entries (converted to assistant messages).
pub(crate) keep_compacted_items: bool,
pub(super) keep_compacted_items: bool,
/// Restricts kept `ResponseItem` entries by variant.
pub(crate) response_item_kinds: StageOneResponseItemKinds,
pub(super) response_item_kinds: StageOneResponseItemKinds,
/// Optional cap on retained items after filtering.
pub(crate) max_items: Option<usize>,
pub(super) max_items: Option<usize>,
}
impl StageOneRolloutFilter {
pub(crate) const fn response_and_compacted_items() -> Self {
pub(super) const fn response_and_compacted_items() -> Self {
Self {
keep_response_items: true,
keep_compacted_items: true,
@ -104,7 +104,7 @@ impl Default for StageOneRolloutFilter {
///
/// `RolloutItem::Compacted` entries are converted to assistant messages so the
/// model sees the same response-item shape as normal transcript content.
pub(crate) fn filter_rollout_response_items(
pub(super) fn filter_rollout_response_items(
items: &[RolloutItem],
filter: StageOneRolloutFilter,
) -> Vec<ResponseItem> {
@ -139,7 +139,7 @@ pub(crate) fn filter_rollout_response_items(
}
/// Serializes filtered stage-1 memory items for prompt inclusion.
pub(crate) fn serialize_filtered_rollout_response_items(
pub(super) fn serialize_filtered_rollout_response_items(
items: &[RolloutItem],
filter: StageOneRolloutFilter,
) -> Result<String> {

View file

@ -0,0 +1,3 @@
pub(super) const MEMORY_SCOPE_KIND_CWD: &str = "cwd";
pub(super) const MEMORY_SCOPE_KIND_USER: &str = "user";
pub(super) const MEMORY_SCOPE_KEY_USER: &str = "user";

View file

@ -1,47 +0,0 @@
use chrono::Duration;
use chrono::Utc;
use codex_protocol::ThreadId;
use codex_state::ThreadMetadata;
use super::types::RolloutCandidate;
/// Selects rollout candidates that need stage-1 memory extraction.
///
/// A rollout is selected when it is not the active thread and was updated
/// within the configured max age window.
pub(crate) fn select_rollout_candidates_from_db(
items: &[ThreadMetadata],
current_thread_id: ThreadId,
max_items: usize,
max_age_days: i64,
) -> Vec<RolloutCandidate> {
if max_items == 0 {
return Vec::new();
}
let cutoff = Utc::now() - Duration::days(max_age_days.max(0));
let mut candidates = Vec::new();
for item in items {
if item.id == current_thread_id {
continue;
}
if item.updated_at < cutoff {
continue;
}
candidates.push(RolloutCandidate {
thread_id: item.id,
rollout_path: item.rollout_path.clone(),
cwd: item.cwd.clone(),
updated_at: Some(item.updated_at.to_rfc3339()),
});
if candidates.len() >= max_items {
break;
}
}
candidates
}

View file

@ -5,10 +5,12 @@ use regex::Regex;
use serde_json::Value;
use serde_json::json;
use super::text::compact_whitespace;
use super::text::truncate_text_for_storage;
use super::types::StageOneOutput;
/// System prompt for stage-1 raw memory extraction.
pub(crate) const RAW_MEMORY_PROMPT: &str =
pub(super) const RAW_MEMORY_PROMPT: &str =
include_str!("../../templates/memories/stage_one_system.md");
const MAX_STAGE_ONE_RAW_MEMORY_CHARS: usize = 300_000;
const MAX_STAGE_ONE_SUMMARY_CHARS: usize = 1_200;
@ -22,7 +24,7 @@ static SECRET_ASSIGNMENT_REGEX: Lazy<Regex> = Lazy::new(|| {
});
/// JSON schema used to constrain stage-1 model output.
pub(crate) fn stage_one_output_schema() -> Value {
pub(super) fn stage_one_output_schema() -> Value {
json!({
"type": "object",
"properties": {
@ -38,7 +40,7 @@ pub(crate) fn stage_one_output_schema() -> Value {
///
/// Accepts plain JSON objects, fenced JSON, and object snippets embedded in
/// extra text, then enforces redaction and size limits.
pub(crate) fn parse_stage_one_output(raw: &str) -> Result<StageOneOutput> {
pub(super) fn parse_stage_one_output(raw: &str) -> Result<StageOneOutput> {
let parsed = parse_json_object_loose(raw)?;
let output: StageOneOutput = serde_json::from_value(parsed).map_err(|err| {
CodexErr::InvalidRequest(format!("invalid stage-1 memory output JSON payload: {err}"))
@ -91,35 +93,6 @@ fn parse_json_object_loose(raw: &str) -> Result<Value> {
))
}
fn prefix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let mut end = 0;
for (idx, _) in input.char_indices() {
if idx > max_bytes {
break;
}
end = idx;
}
&input[..end]
}
fn suffix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let start_limit = input.len().saturating_sub(max_bytes);
let mut start = input.len();
for (idx, _) in input.char_indices().rev() {
if idx < start_limit {
break;
}
start = idx;
}
&input[start..]
}
fn normalize_stage_one_output(mut output: StageOneOutput) -> Result<StageOneOutput> {
output.raw_memory = output.raw_memory.trim().to_string();
output.summary = output.summary.trim().to_string();
@ -157,10 +130,6 @@ fn normalize_stage_one_output(mut output: StageOneOutput) -> Result<StageOneOutp
Ok(output)
}
fn compact_whitespace(input: &str) -> String {
input.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn redact_secrets(input: &str) -> String {
let redacted = OPENAI_KEY_REGEX.replace_all(input, "[REDACTED_SECRET]");
let redacted = AWS_ACCESS_KEY_ID_REGEX.replace_all(&redacted, "[REDACTED_SECRET]");
@ -204,20 +173,6 @@ fn has_raw_memory_structure(input: &str) -> bool {
&& trimmed.contains("Outcome:")
}
fn truncate_text_for_storage(input: &str, max_bytes: usize, marker: &str) -> String {
if input.len() <= max_bytes {
return input.to_string();
}
let budget_without_marker = max_bytes.saturating_sub(marker.len());
let head_budget = budget_without_marker / 2;
let tail_budget = budget_without_marker.saturating_sub(head_budget);
let head = prefix_at_char_boundary(input, head_budget);
let tail = suffix_at_char_boundary(input, tail_budget);
format!("{head}{marker}{tail}")
}
fn compile_regex(pattern: &str) -> Regex {
match Regex::new(pattern) {
Ok(regex) => regex,

View file

@ -0,0 +1,221 @@
use crate::codex::Session;
use crate::config::Config;
use codex_protocol::protocol::SessionSource;
use codex_protocol::protocol::SubAgentSource;
use codex_protocol::user_input::UserInput;
use std::sync::Arc;
use tracing::debug;
use tracing::info;
use tracing::warn;
use super::super::MAX_RAW_MEMORIES_PER_SCOPE;
use super::super::MEMORY_CONSOLIDATION_SUBAGENT_LABEL;
use super::super::PHASE_TWO_JOB_LEASE_SECONDS;
use super::super::PHASE_TWO_JOB_RETRY_DELAY_SECONDS;
use super::super::prompts::build_consolidation_prompt;
use super::super::storage::rebuild_memory_summary_from_memories;
use super::super::storage::sync_raw_memories_from_memories;
use super::super::storage::wipe_consolidation_outputs;
use super::MemoryScopeTarget;
use super::watch::spawn_phase2_completion_task;
pub(super) async fn run_memory_consolidation_for_scope(
session: Arc<Session>,
config: Arc<Config>,
scope: MemoryScopeTarget,
) {
let Some(state_db) = session.services.state_db.as_deref() else {
warn!(
"state db unavailable for scope {}:{}; skipping consolidation",
scope.scope_kind, scope.scope_key
);
return;
};
let claim = match state_db
.try_claim_phase2_job(
scope.scope_kind,
&scope.scope_key,
session.conversation_id,
PHASE_TWO_JOB_LEASE_SECONDS,
)
.await
{
Ok(claim) => claim,
Err(err) => {
warn!(
"state db try_claim_phase2_job failed for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
return;
}
};
let (ownership_token, claimed_watermark) = match claim {
codex_state::Phase2JobClaimOutcome::Claimed {
ownership_token,
input_watermark,
} => (ownership_token, input_watermark),
codex_state::Phase2JobClaimOutcome::SkippedNotDirty => {
debug!(
"memory phase-2 scope not pending (or already up to date); skipping consolidation: {}:{}",
scope.scope_kind, scope.scope_key
);
return;
}
codex_state::Phase2JobClaimOutcome::SkippedRunning => {
debug!(
"memory phase-2 job already running for scope {}:{}; skipping",
scope.scope_kind, scope.scope_key
);
return;
}
};
let latest_memories = match state_db
.list_stage1_outputs_for_scope(
scope.scope_kind,
&scope.scope_key,
MAX_RAW_MEMORIES_PER_SCOPE,
)
.await
{
Ok(memories) => memories,
Err(err) => {
warn!(
"state db list_stage1_outputs_for_scope failed during consolidation for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed to read scope stage-1 outputs before consolidation",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
return;
}
};
if latest_memories.is_empty() {
debug!(
"memory phase-2 scope has no stage-1 outputs; skipping consolidation: {}:{}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_succeeded(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
claimed_watermark,
)
.await;
return;
};
let materialized_watermark = latest_memories
.iter()
.map(|memory| memory.source_updated_at.timestamp())
.max()
.unwrap_or(claimed_watermark);
if let Err(err) = sync_raw_memories_from_memories(&scope.memory_root, &latest_memories).await {
warn!(
"failed syncing phase-1 raw memories for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed syncing phase-1 raw memories",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
return;
}
if let Err(err) =
rebuild_memory_summary_from_memories(&scope.memory_root, &latest_memories).await
{
warn!(
"failed rebuilding memory summary for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed rebuilding memory summary",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
return;
}
if let Err(err) = wipe_consolidation_outputs(&scope.memory_root).await {
warn!(
"failed to wipe previous consolidation outputs for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed to wipe previous consolidation outputs",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
return;
}
let prompt = build_consolidation_prompt(&scope.memory_root);
let input = vec![UserInput::Text {
text: prompt,
text_elements: vec![],
}];
let mut consolidation_config = config.as_ref().clone();
consolidation_config.cwd = scope.memory_root.clone();
let source = SessionSource::SubAgent(SubAgentSource::Other(
MEMORY_CONSOLIDATION_SUBAGENT_LABEL.to_string(),
));
match session
.services
.agent_control
.spawn_agent(consolidation_config, input, Some(source))
.await
{
Ok(consolidation_agent_id) => {
info!(
"memory phase-2 consolidation agent started: scope={} scope_key={} agent_id={}",
scope.scope_kind, scope.scope_key, consolidation_agent_id
);
spawn_phase2_completion_task(
session.as_ref(),
scope,
ownership_token,
materialized_watermark,
consolidation_agent_id,
);
}
Err(err) => {
warn!(
"failed to spawn memory consolidation agent for scope {}:{}: {err}",
scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed to spawn consolidation agent",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
}
}
}

View file

@ -0,0 +1,150 @@
use crate::client_common::Prompt;
use crate::client_common::ResponseEvent;
use crate::client_common::ResponseStream;
use crate::codex::Session;
use crate::error::CodexErr;
use crate::error::Result as CodexResult;
use crate::rollout::RolloutRecorder;
use codex_protocol::models::BaseInstructions;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use futures::StreamExt;
use tracing::warn;
use super::StageOneRequestContext;
use crate::memories::prompts::build_stage_one_input_message;
use crate::memories::rollout::StageOneRolloutFilter;
use crate::memories::rollout::serialize_filtered_rollout_response_items;
use crate::memories::stage_one::RAW_MEMORY_PROMPT;
use crate::memories::stage_one::parse_stage_one_output;
use crate::memories::stage_one::stage_one_output_schema;
use crate::memories::types::StageOneOutput;
use std::path::Path;
pub(super) async fn extract_stage_one_output(
session: &Session,
rollout_path: &Path,
stage_one_context: &StageOneRequestContext,
) -> Result<StageOneOutput, &'static str> {
let (rollout_items, _thread_id, parse_errors) =
match RolloutRecorder::load_rollout_items(rollout_path).await {
Ok(result) => result,
Err(err) => {
warn!(
"failed to load rollout {} for memories: {err}",
rollout_path.display()
);
return Err("failed to load rollout");
}
};
if parse_errors > 0 {
warn!(
"rollout {} had {parse_errors} parse errors while preparing stage-1 memory input",
rollout_path.display()
);
}
let rollout_contents = match serialize_filtered_rollout_response_items(
&rollout_items,
StageOneRolloutFilter::default(),
) {
Ok(contents) => contents,
Err(err) => {
warn!(
"failed to prepare filtered rollout payload {} for memories: {err}",
rollout_path.display()
);
return Err("failed to serialize filtered rollout");
}
};
let prompt = Prompt {
input: vec![ResponseItem::Message {
id: None,
role: "user".to_string(),
content: vec![ContentItem::InputText {
text: build_stage_one_input_message(rollout_path, &rollout_contents),
}],
end_turn: None,
phase: None,
}],
tools: Vec::new(),
parallel_tool_calls: false,
base_instructions: BaseInstructions {
text: RAW_MEMORY_PROMPT.to_string(),
},
personality: None,
output_schema: Some(stage_one_output_schema()),
};
let mut client_session = session.services.model_client.new_session();
let mut stream = match client_session
.stream(
&prompt,
&stage_one_context.model_info,
&stage_one_context.otel_manager,
stage_one_context.reasoning_effort,
stage_one_context.reasoning_summary,
stage_one_context.turn_metadata_header.as_deref(),
)
.await
{
Ok(stream) => stream,
Err(err) => {
warn!(
"stage-1 memory request failed for rollout {}: {err}",
rollout_path.display()
);
return Err("stage-1 memory request failed");
}
};
let output_text = match collect_response_text_until_completed(&mut stream).await {
Ok(text) => text,
Err(err) => {
warn!(
"failed while waiting for stage-1 memory response for rollout {}: {err}",
rollout_path.display()
);
return Err("stage-1 memory response stream failed");
}
};
match parse_stage_one_output(&output_text) {
Ok(output) => Ok(output),
Err(err) => {
warn!(
"invalid stage-1 memory payload for rollout {}: {err}",
rollout_path.display()
);
Err("invalid stage-1 memory payload")
}
}
}
async fn collect_response_text_until_completed(stream: &mut ResponseStream) -> CodexResult<String> {
let mut output_text = String::new();
loop {
let Some(event) = stream.next().await else {
return Err(CodexErr::Stream(
"stream closed before response.completed".to_string(),
None,
));
};
match event? {
ResponseEvent::OutputTextDelta(delta) => output_text.push_str(&delta),
ResponseEvent::OutputItemDone(item) => {
if output_text.is_empty()
&& let ResponseItem::Message { content, .. } = item
&& let Some(text) = crate::compact::content_items_to_text(&content)
{
output_text.push_str(&text);
}
}
ResponseEvent::Completed { .. } => return Ok(output_text),
_ => {}
}
}
}

View file

@ -0,0 +1,352 @@
mod dispatch;
mod extract;
mod watch;
use crate::codex::Session;
use crate::codex::TurnContext;
use crate::config::Config;
use crate::error::Result as CodexResult;
use crate::features::Feature;
use crate::memories::layout::memory_root_for_cwd;
use crate::memories::layout::memory_root_for_user;
use crate::memories::scope::MEMORY_SCOPE_KEY_USER;
use crate::memories::scope::MEMORY_SCOPE_KIND_CWD;
use crate::memories::scope::MEMORY_SCOPE_KIND_USER;
use crate::rollout::INTERACTIVE_SESSION_SOURCES;
use codex_otel::OtelManager;
use codex_protocol::config_types::ReasoningSummary as ReasoningSummaryConfig;
use codex_protocol::openai_models::ModelInfo;
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
use codex_protocol::protocol::SessionSource;
use futures::StreamExt;
use serde_json::Value;
use std::path::PathBuf;
use std::sync::Arc;
use tracing::info;
use tracing::warn;
pub(super) const PHASE_ONE_THREAD_SCAN_LIMIT: usize = 5_000;
#[derive(Clone)]
struct StageOneRequestContext {
model_info: ModelInfo,
otel_manager: OtelManager,
reasoning_effort: Option<ReasoningEffortConfig>,
reasoning_summary: ReasoningSummaryConfig,
turn_metadata_header: Option<String>,
}
impl StageOneRequestContext {
fn from_turn_context(turn_context: &TurnContext, turn_metadata_header: Option<String>) -> Self {
Self {
model_info: turn_context.model_info.clone(),
otel_manager: turn_context.otel_manager.clone(),
reasoning_effort: turn_context.reasoning_effort,
reasoning_summary: turn_context.reasoning_summary,
turn_metadata_header,
}
}
}
/// Canonical memory scope metadata used by both startup phases.
#[derive(Clone, Debug, PartialEq, Eq)]
pub(super) struct MemoryScopeTarget {
/// Scope family used for DB ownership and dirty-state tracking.
pub(super) scope_kind: &'static str,
/// Scope identifier used for DB keys.
pub(super) scope_key: String,
/// On-disk root where phase-1 artifacts and phase-2 outputs live.
pub(super) memory_root: PathBuf,
}
/// Converts a pending scope consolidation row into a concrete filesystem target for phase 2.
///
/// Unsupported scope kinds or malformed user-scope keys are ignored.
pub(super) fn memory_scope_target_for_pending_scope(
config: &Config,
pending_scope: codex_state::PendingScopeConsolidation,
) -> Option<MemoryScopeTarget> {
let scope_kind = pending_scope.scope_kind;
let scope_key = pending_scope.scope_key;
match scope_kind.as_str() {
MEMORY_SCOPE_KIND_CWD => {
let cwd = PathBuf::from(&scope_key);
Some(MemoryScopeTarget {
scope_kind: MEMORY_SCOPE_KIND_CWD,
scope_key,
memory_root: memory_root_for_cwd(&config.codex_home, &cwd),
})
}
MEMORY_SCOPE_KIND_USER => {
if scope_key != MEMORY_SCOPE_KEY_USER {
warn!(
"skipping unsupported user memory scope key for phase-2: {}:{}",
scope_kind, scope_key
);
return None;
}
Some(MemoryScopeTarget {
scope_kind: MEMORY_SCOPE_KIND_USER,
scope_key,
memory_root: memory_root_for_user(&config.codex_home),
})
}
_ => {
warn!(
"skipping unsupported memory scope for phase-2 consolidation: {}:{}",
scope_kind, scope_key
);
None
}
}
}
/// Starts the asynchronous startup memory pipeline for an eligible root session.
///
/// The pipeline is skipped for ephemeral sessions, disabled feature flags, and
/// subagent sessions.
pub(crate) fn start_memories_startup_task(
session: &Arc<Session>,
config: Arc<Config>,
source: &SessionSource,
) {
if config.ephemeral
|| !config.features.enabled(Feature::MemoryTool)
|| matches!(source, SessionSource::SubAgent(_))
{
return;
}
let weak_session = Arc::downgrade(session);
tokio::spawn(async move {
let Some(session) = weak_session.upgrade() else {
return;
};
if let Err(err) = run_memories_startup_pipeline(&session, config).await {
warn!("memories startup pipeline failed: {err}");
}
});
}
/// Runs the startup memory pipeline.
///
/// Phase 1 selects rollout candidates, performs stage-1 extraction requests in
/// parallel, persists stage-1 outputs, and enqueues consolidation work.
///
/// Phase 2 claims pending scopes and spawns consolidation agents.
pub(super) async fn run_memories_startup_pipeline(
session: &Arc<Session>,
config: Arc<Config>,
) -> CodexResult<()> {
let Some(state_db) = session.services.state_db.as_deref() else {
warn!("state db unavailable for memories startup pipeline; skipping");
return Ok(());
};
let allowed_sources = INTERACTIVE_SESSION_SOURCES
.iter()
.map(|value| match serde_json::to_value(value) {
Ok(Value::String(s)) => s,
Ok(other) => other.to_string(),
Err(_) => String::new(),
})
.collect::<Vec<_>>();
let claimed_candidates = match state_db
.claim_stage1_jobs_for_startup(
session.conversation_id,
PHASE_ONE_THREAD_SCAN_LIMIT,
super::MAX_ROLLOUTS_PER_STARTUP,
super::PHASE_ONE_MAX_ROLLOUT_AGE_DAYS,
allowed_sources.as_slice(),
super::PHASE_ONE_JOB_LEASE_SECONDS,
)
.await
{
Ok(claims) => claims,
Err(err) => {
warn!("state db claim_stage1_jobs_for_startup failed during memories startup: {err}");
Vec::new()
}
};
let claimed_count = claimed_candidates.len();
let mut succeeded_count = 0;
if claimed_count > 0 {
let turn_context = session.new_default_turn().await;
let stage_one_context = StageOneRequestContext::from_turn_context(
turn_context.as_ref(),
turn_context.resolve_turn_metadata_header().await,
);
succeeded_count = futures::stream::iter(claimed_candidates.into_iter())
.map(|claim| {
let session = Arc::clone(session);
let stage_one_context = stage_one_context.clone();
async move {
let thread = claim.thread;
let stage_one_output = match extract::extract_stage_one_output(
session.as_ref(),
&thread.rollout_path,
&stage_one_context,
)
.await
{
Ok(output) => output,
Err(reason) => {
if let Some(state_db) = session.services.state_db.as_deref() {
let _ = state_db
.mark_stage1_job_failed(
thread.id,
&claim.ownership_token,
reason,
super::PHASE_ONE_JOB_RETRY_DELAY_SECONDS,
)
.await;
}
return false;
}
};
let Some(state_db) = session.services.state_db.as_deref() else {
return false;
};
state_db
.mark_stage1_job_succeeded(
thread.id,
&claim.ownership_token,
thread.updated_at.timestamp(),
&stage_one_output.raw_memory,
&stage_one_output.summary,
)
.await
.unwrap_or(false)
}
})
.buffer_unordered(super::PHASE_ONE_CONCURRENCY_LIMIT)
.collect::<Vec<bool>>()
.await
.into_iter()
.filter(|ok| *ok)
.count();
}
info!(
"memory stage-1 extraction complete: {} job(s) claimed, {} succeeded",
claimed_count, succeeded_count
);
let consolidation_scope_count = run_consolidation_dispatch(session, config).await;
info!(
"memory consolidation dispatch complete: {} scope(s) scheduled",
consolidation_scope_count
);
Ok(())
}
async fn run_consolidation_dispatch(session: &Arc<Session>, config: Arc<Config>) -> usize {
let scopes = list_consolidation_scopes(
session.as_ref(),
config.as_ref(),
super::MAX_ROLLOUTS_PER_STARTUP,
)
.await;
let consolidation_scope_count = scopes.len();
futures::stream::iter(scopes.into_iter())
.map(|scope| {
let session = Arc::clone(session);
let config = Arc::clone(&config);
async move {
dispatch::run_memory_consolidation_for_scope(session, config, scope).await;
}
})
.buffer_unordered(super::PHASE_TWO_CONCURRENCY_LIMIT)
.collect::<Vec<_>>()
.await;
consolidation_scope_count
}
async fn list_consolidation_scopes(
session: &Session,
config: &Config,
limit: usize,
) -> Vec<MemoryScopeTarget> {
if limit == 0 {
return Vec::new();
}
let Some(state_db) = session.services.state_db.as_deref() else {
return Vec::new();
};
let pending_scopes = match state_db.list_pending_scope_consolidations(limit).await {
Ok(scopes) => scopes,
Err(_) => return Vec::new(),
};
pending_scopes
.into_iter()
.filter_map(|scope| memory_scope_target_for_pending_scope(config, scope))
.collect()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::config::test_config;
use std::path::PathBuf;
/// Verifies that phase-2 pending scope rows are translated only for supported scopes.
#[test]
fn pending_scope_mapping_accepts_supported_scopes_only() {
let mut config = test_config();
config.codex_home = PathBuf::from("/tmp/memory-startup-test-home");
let cwd_target = memory_scope_target_for_pending_scope(
&config,
codex_state::PendingScopeConsolidation {
scope_kind: MEMORY_SCOPE_KIND_CWD.to_string(),
scope_key: "/tmp/project-a".to_string(),
},
)
.expect("cwd scope should map");
assert_eq!(cwd_target.scope_kind, MEMORY_SCOPE_KIND_CWD);
let user_target = memory_scope_target_for_pending_scope(
&config,
codex_state::PendingScopeConsolidation {
scope_kind: MEMORY_SCOPE_KIND_USER.to_string(),
scope_key: MEMORY_SCOPE_KEY_USER.to_string(),
},
)
.expect("valid user scope should map");
assert_eq!(user_target.scope_kind, MEMORY_SCOPE_KIND_USER);
assert!(
memory_scope_target_for_pending_scope(
&config,
codex_state::PendingScopeConsolidation {
scope_kind: MEMORY_SCOPE_KIND_USER.to_string(),
scope_key: "unexpected-user-key".to_string(),
},
)
.is_none()
);
assert!(
memory_scope_target_for_pending_scope(
&config,
codex_state::PendingScopeConsolidation {
scope_kind: "unknown".to_string(),
scope_key: "scope".to_string(),
},
)
.is_none()
);
}
}

View file

@ -0,0 +1,188 @@
use crate::agent::AgentStatus;
use crate::agent::status::is_final as is_final_agent_status;
use crate::codex::Session;
use codex_protocol::ThreadId;
use std::time::Duration;
use tracing::debug;
use tracing::info;
use tracing::warn;
use super::super::PHASE_TWO_JOB_HEARTBEAT_SECONDS;
use super::super::PHASE_TWO_JOB_LEASE_SECONDS;
use super::super::PHASE_TWO_JOB_RETRY_DELAY_SECONDS;
use super::MemoryScopeTarget;
pub(super) fn spawn_phase2_completion_task(
session: &Session,
scope: MemoryScopeTarget,
ownership_token: String,
completion_watermark: i64,
consolidation_agent_id: ThreadId,
) {
let state_db = session.services.state_db.clone();
let agent_control = session.services.agent_control.clone();
tokio::spawn(async move {
let Some(state_db) = state_db.as_deref() else {
return;
};
let mut status_rx = match agent_control.subscribe_status(consolidation_agent_id).await {
Ok(status_rx) => status_rx,
Err(err) => {
warn!(
"failed to subscribe to memory consolidation agent {} for scope {}:{}: {err}",
consolidation_agent_id, scope.scope_kind, scope.scope_key
);
let _ = state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
"failed to subscribe to consolidation agent status",
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await;
return;
}
};
let mut heartbeat_interval =
tokio::time::interval(Duration::from_secs(PHASE_TWO_JOB_HEARTBEAT_SECONDS));
heartbeat_interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
let final_status = loop {
let status = status_rx.borrow().clone();
if is_final_agent_status(&status) {
break status;
}
tokio::select! {
changed = status_rx.changed() => {
if changed.is_err() {
warn!(
"lost status updates for memory consolidation agent {} in scope {}:{}",
consolidation_agent_id, scope.scope_kind, scope.scope_key
);
break status;
}
}
_ = heartbeat_interval.tick() => {
match state_db
.heartbeat_phase2_job(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
PHASE_TWO_JOB_LEASE_SECONDS,
)
.await
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 heartbeat lost ownership for scope {}:{}; skipping finalization",
scope.scope_kind, scope.scope_key
);
return;
}
Err(err) => {
warn!(
"state db heartbeat_phase2_job failed during memories startup: {err}"
);
return;
}
}
}
}
};
if is_phase2_success(&final_status) {
match state_db
.mark_phase2_job_succeeded(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
completion_watermark,
)
.await
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 success finalization skipped after ownership changed: scope={} scope_key={}",
scope.scope_kind, scope.scope_key
);
}
Err(err) => {
warn!(
"state db mark_phase2_job_succeeded failed during memories startup: {err}"
);
}
}
info!(
"memory phase-2 consolidation agent finished: scope={} scope_key={} agent_id={} final_status={final_status:?}",
scope.scope_kind, scope.scope_key, consolidation_agent_id
);
return;
}
let failure_reason = phase2_failure_reason(&final_status);
match state_db
.mark_phase2_job_failed(
scope.scope_kind,
&scope.scope_key,
&ownership_token,
&failure_reason,
PHASE_TWO_JOB_RETRY_DELAY_SECONDS,
)
.await
{
Ok(true) => {}
Ok(false) => {
debug!(
"memory phase-2 failure finalization skipped after ownership changed: scope={} scope_key={}",
scope.scope_kind, scope.scope_key
);
}
Err(err) => {
warn!("state db mark_phase2_job_failed failed during memories startup: {err}");
}
}
warn!(
"memory phase-2 consolidation agent finished with non-success status: scope={} scope_key={} agent_id={} final_status={final_status:?}",
scope.scope_kind, scope.scope_key, consolidation_agent_id
);
});
}
fn is_phase2_success(final_status: &AgentStatus) -> bool {
matches!(final_status, AgentStatus::Completed(_))
}
fn phase2_failure_reason(final_status: &AgentStatus) -> String {
format!("consolidation agent finished with status {final_status:?}")
}
#[cfg(test)]
mod tests {
use super::is_phase2_success;
use super::phase2_failure_reason;
use crate::agent::AgentStatus;
#[test]
fn phase2_success_only_for_completed_status() {
assert!(is_phase2_success(&AgentStatus::Completed(None)));
assert!(!is_phase2_success(&AgentStatus::Running));
assert!(!is_phase2_success(&AgentStatus::Errored(
"oops".to_string()
)));
}
#[test]
fn phase2_failure_reason_includes_status() {
let status = AgentStatus::Errored("boom".to_string());
let reason = phase2_failure_reason(&status);
assert!(reason.contains("consolidation agent finished with status"));
assert!(reason.contains("boom"));
}
}

View file

@ -1,48 +1,32 @@
use codex_state::ThreadMemory;
use codex_state::Stage1Output;
use std::collections::BTreeSet;
use std::fmt::Write as _;
use std::path::Path;
use std::path::PathBuf;
use tracing::warn;
use super::LEGACY_CONSOLIDATED_FILENAME;
use super::MAX_RAW_MEMORIES_PER_SCOPE;
use super::MEMORY_REGISTRY_FILENAME;
use super::SKILLS_SUBDIR;
use super::ensure_layout;
use super::memory_summary_file;
use super::raw_memories_dir;
/// Prunes stale raw memory files and rebuilds the routing summary for recent memories.
pub(crate) async fn prune_to_recent_memories_and_rebuild_summary(
root: &Path,
memories: &[ThreadMemory],
) -> std::io::Result<()> {
ensure_layout(root).await?;
let keep = memories
.iter()
.take(MAX_RAW_MEMORIES_PER_SCOPE)
.map(|memory| memory.thread_id.to_string())
.collect::<BTreeSet<_>>();
prune_raw_memories(root, &keep).await?;
rebuild_memory_summary(root, memories).await
}
use super::text::compact_whitespace;
use crate::memories::layout::LEGACY_CONSOLIDATED_FILENAME;
use crate::memories::layout::MEMORY_REGISTRY_FILENAME;
use crate::memories::layout::SKILLS_SUBDIR;
use crate::memories::layout::ensure_layout;
use crate::memories::layout::memory_summary_file;
use crate::memories::layout::raw_memories_dir;
/// Rebuild `memory_summary.md` for a scope without pruning raw memory files.
pub(crate) async fn rebuild_memory_summary_from_memories(
pub(super) async fn rebuild_memory_summary_from_memories(
root: &Path,
memories: &[ThreadMemory],
memories: &[Stage1Output],
) -> std::io::Result<()> {
ensure_layout(root).await?;
rebuild_memory_summary(root, memories).await
}
/// Syncs canonical raw memory files from DB-backed memory rows.
pub(crate) async fn sync_raw_memories_from_memories(
pub(super) async fn sync_raw_memories_from_memories(
root: &Path,
memories: &[ThreadMemory],
memories: &[Stage1Output],
) -> std::io::Result<()> {
ensure_layout(root).await?;
@ -65,7 +49,7 @@ pub(crate) async fn sync_raw_memories_from_memories(
/// Clears consolidation outputs so a fresh consolidation run can regenerate them.
///
/// Phase-1 artifacts (`raw_memories/` and `memory_summary.md`) are preserved.
pub(crate) async fn wipe_consolidation_outputs(root: &Path) -> std::io::Result<()> {
pub(super) async fn wipe_consolidation_outputs(root: &Path) -> std::io::Result<()> {
for file_name in [MEMORY_REGISTRY_FILENAME, LEGACY_CONSOLIDATED_FILENAME] {
let path = root.join(file_name);
if let Err(err) = tokio::fs::remove_file(&path).await
@ -91,7 +75,7 @@ pub(crate) async fn wipe_consolidation_outputs(root: &Path) -> std::io::Result<(
Ok(())
}
async fn rebuild_memory_summary(root: &Path, memories: &[ThreadMemory]) -> std::io::Result<()> {
async fn rebuild_memory_summary(root: &Path, memories: &[Stage1Output]) -> std::io::Result<()> {
let mut body = String::from("# Memory Summary\n\n");
if memories.is_empty() {
@ -101,7 +85,7 @@ async fn rebuild_memory_summary(root: &Path, memories: &[ThreadMemory]) -> std::
body.push_str("Map of concise summaries to thread IDs (latest first):\n\n");
for memory in memories.iter().take(MAX_RAW_MEMORIES_PER_SCOPE) {
let summary = compact_summary_for_index(&memory.memory_summary);
let summary = compact_whitespace(&memory.summary);
writeln!(body, "- {summary} (thread: `{}`)", memory.thread_id)
.map_err(|err| std::io::Error::other(format!("format memory summary: {err}")))?;
}
@ -178,7 +162,7 @@ async fn remove_outdated_thread_raw_memories(
async fn write_raw_memory_for_thread(
root: &Path,
memory: &ThreadMemory,
memory: &Stage1Output,
) -> std::io::Result<PathBuf> {
let path = raw_memories_dir(root).join(format!("{}.md", memory.thread_id));
@ -187,8 +171,12 @@ async fn write_raw_memory_for_thread(
let mut body = String::new();
writeln!(body, "thread_id: {}", memory.thread_id)
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
writeln!(body, "updated_at: {}", memory.updated_at.to_rfc3339())
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
writeln!(
body,
"updated_at: {}",
memory.source_updated_at.to_rfc3339()
)
.map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
writeln!(body).map_err(|err| std::io::Error::other(format!("format raw memory: {err}")))?;
body.push_str(memory.raw_memory.trim());
body.push('\n');
@ -197,10 +185,6 @@ async fn write_raw_memory_for_thread(
Ok(path)
}
fn compact_summary_for_index(summary: &str) -> String {
summary.split_whitespace().collect::<Vec<_>>().join(" ")
}
fn extract_thread_id_from_summary_filename(file_name: &str) -> Option<&str> {
let stem = file_name.strip_suffix(".md")?;
if stem.is_empty() {

View file

@ -1,17 +1,14 @@
use super::MEMORY_SCOPE_KIND_CWD;
use super::PHASE_ONE_MAX_ROLLOUT_AGE_DAYS;
use super::StageOneResponseItemKinds;
use super::StageOneRolloutFilter;
use super::ensure_layout;
use super::memory_root_for_cwd;
use super::memory_scope_key_for_cwd;
use super::memory_summary_file;
use super::parse_stage_one_output;
use super::prune_to_recent_memories_and_rebuild_summary;
use super::raw_memories_dir;
use super::select_rollout_candidates_from_db;
use super::serialize_filtered_rollout_response_items;
use super::wipe_consolidation_outputs;
use super::rollout::StageOneResponseItemKinds;
use super::rollout::StageOneRolloutFilter;
use super::rollout::serialize_filtered_rollout_response_items;
use super::stage_one::parse_stage_one_output;
use super::storage::rebuild_memory_summary_from_memories;
use super::storage::sync_raw_memories_from_memories;
use super::storage::wipe_consolidation_outputs;
use crate::memories::layout::ensure_layout;
use crate::memories::layout::memory_root_for_cwd;
use crate::memories::layout::memory_summary_file;
use crate::memories::layout::raw_memories_dir;
use chrono::TimeZone;
use chrono::Utc;
use codex_protocol::ThreadId;
@ -19,44 +16,10 @@ use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseItem;
use codex_protocol::protocol::CompactedItem;
use codex_protocol::protocol::RolloutItem;
use codex_state::ThreadMemory;
use codex_state::ThreadMetadata;
use codex_state::Stage1Output;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
use tempfile::tempdir;
fn thread_metadata(
thread_id: ThreadId,
path: PathBuf,
cwd: PathBuf,
title: &str,
updated_at_secs: i64,
) -> ThreadMetadata {
let updated_at = Utc
.timestamp_opt(updated_at_secs, 0)
.single()
.expect("timestamp");
ThreadMetadata {
id: thread_id,
rollout_path: path,
created_at: updated_at,
updated_at,
source: "cli".to_string(),
model_provider: "openai".to_string(),
cwd,
cli_version: "test".to_string(),
title: title.to_string(),
sandbox_policy: "read_only".to_string(),
approval_mode: "on_request".to_string(),
tokens_used: 0,
first_user_message: None,
archived_at: None,
git_branch: None,
git_sha: None,
git_origin_url: None,
}
}
#[test]
fn memory_root_varies_by_cwd() {
let dir = tempdir().expect("tempdir");
@ -100,22 +63,6 @@ fn memory_root_encoding_avoids_component_collisions() {
assert!(!root_hash.display().to_string().contains("workspace"));
}
#[test]
fn memory_scope_key_uses_normalized_cwd() {
let dir = tempdir().expect("tempdir");
let workspace = dir.path().join("workspace");
std::fs::create_dir_all(&workspace).expect("mkdir workspace");
std::fs::create_dir_all(workspace.join("nested")).expect("mkdir nested");
let alias = workspace.join("nested").join("..");
let normalized = workspace
.canonicalize()
.expect("canonical workspace path should resolve");
let alias_key = memory_scope_key_for_cwd(&alias);
let normalized_key = memory_scope_key_for_cwd(&normalized);
assert_eq!(alias_key, normalized_key);
}
#[test]
fn parse_stage_one_output_accepts_fenced_json() {
let raw = "```json\n{\"rawMemory\":\"abc\",\"summary\":\"short\"}\n```";
@ -224,61 +171,6 @@ fn serialize_filtered_rollout_response_items_filters_by_response_item_kind() {
assert!(matches!(parsed[0], ResponseItem::Message { .. }));
}
#[test]
fn select_rollout_candidates_filters_by_age_window() {
let dir = tempdir().expect("tempdir");
let cwd_a = dir.path().join("workspace-a");
let cwd_b = dir.path().join("workspace-b");
std::fs::create_dir_all(&cwd_a).expect("mkdir cwd a");
std::fs::create_dir_all(&cwd_b).expect("mkdir cwd b");
let now = Utc::now().timestamp();
let current_thread_id = ThreadId::default();
let recent_thread_id = ThreadId::default();
let old_thread_id = ThreadId::default();
let recent_two_thread_id = ThreadId::default();
let current = thread_metadata(
current_thread_id,
dir.path().join("current.jsonl"),
cwd_a.clone(),
"current",
now,
);
let recent = thread_metadata(
recent_thread_id,
dir.path().join("recent.jsonl"),
cwd_a,
"recent",
now - 10,
);
let old = thread_metadata(
old_thread_id,
dir.path().join("old.jsonl"),
cwd_b.clone(),
"old",
now - (PHASE_ONE_MAX_ROLLOUT_AGE_DAYS + 1) * 24 * 60 * 60,
);
let recent_two = thread_metadata(
recent_two_thread_id,
dir.path().join("recent-two.jsonl"),
cwd_b,
"recent-two",
now - 20,
);
let candidates = select_rollout_candidates_from_db(
&[current, recent, old, recent_two],
current_thread_id,
5,
PHASE_ONE_MAX_ROLLOUT_AGE_DAYS,
);
assert_eq!(candidates.len(), 2);
assert_eq!(candidates[0].thread_id, recent_thread_id);
assert_eq!(candidates[1].thread_id, recent_two_thread_id);
}
#[tokio::test]
async fn prune_and_rebuild_summary_keeps_latest_memories_only() {
let dir = tempdir().expect("tempdir");
@ -296,22 +188,20 @@ async fn prune_and_rebuild_summary_keeps_latest_memories_only() {
.await
.expect("write drop");
let memories = vec![ThreadMemory {
let memories = vec![Stage1Output {
thread_id: ThreadId::try_from(keep_id.clone()).expect("thread id"),
scope_kind: MEMORY_SCOPE_KIND_CWD.to_string(),
scope_key: "scope".to_string(),
source_updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
memory_summary: "short summary".to_string(),
updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
last_used_at: None,
used_count: 0,
invalidated_at: None,
invalid_reason: None,
summary: "short summary".to_string(),
generated_at: Utc.timestamp_opt(101, 0).single().expect("timestamp"),
}];
prune_to_recent_memories_and_rebuild_summary(&root, &memories)
sync_raw_memories_from_memories(&root, &memories)
.await
.expect("prune and rebuild");
.expect("sync raw memories");
rebuild_memory_summary_from_memories(&root, &memories)
.await
.expect("rebuild memory summary");
assert!(keep_path.is_file());
assert!(!drop_path.exists());

View file

@ -0,0 +1,50 @@
pub(super) fn compact_whitespace(input: &str) -> String {
input.split_whitespace().collect::<Vec<_>>().join(" ")
}
pub(super) fn truncate_text_for_storage(input: &str, max_bytes: usize, marker: &str) -> String {
if input.len() <= max_bytes {
return input.to_string();
}
let budget_without_marker = max_bytes.saturating_sub(marker.len());
let head_budget = budget_without_marker / 2;
let tail_budget = budget_without_marker.saturating_sub(head_budget);
let head = prefix_at_char_boundary(input, head_budget);
let tail = suffix_at_char_boundary(input, tail_budget);
format!("{head}{marker}{tail}")
}
pub(super) fn prefix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let mut end = 0;
for (idx, _) in input.char_indices() {
if idx > max_bytes {
break;
}
end = idx;
}
&input[..end]
}
pub(super) fn suffix_at_char_boundary(input: &str, max_bytes: usize) -> &str {
if max_bytes >= input.len() {
return input;
}
let start_limit = input.len().saturating_sub(max_bytes);
let mut start = input.len();
for (idx, _) in input.char_indices().rev() {
if idx < start_limit {
break;
}
start = idx;
}
&input[start..]
}

View file

@ -1,26 +1,11 @@
use codex_protocol::ThreadId;
use serde::Deserialize;
use std::path::PathBuf;
/// A rollout selected for stage-1 memory extraction during startup.
#[derive(Debug, Clone)]
pub(crate) struct RolloutCandidate {
/// Source thread identifier for this rollout.
pub(crate) thread_id: ThreadId,
/// Absolute path to the rollout file to summarize.
pub(crate) rollout_path: PathBuf,
/// Thread working directory used for per-project memory bucketing.
pub(crate) cwd: PathBuf,
/// Last observed thread update timestamp (RFC3339), if available.
pub(crate) updated_at: Option<String>,
}
/// Parsed stage-1 model output payload.
#[derive(Debug, Clone, Deserialize)]
pub(crate) struct StageOneOutput {
pub(super) struct StageOneOutput {
/// Detailed markdown raw memory for a single rollout.
#[serde(rename = "rawMemory", alias = "traceMemory")]
pub(crate) raw_memory: String,
pub(super) raw_memory: String,
/// Compact summary line used for routing and indexing.
pub(crate) summary: String,
pub(super) summary: String,
}

View file

@ -315,64 +315,6 @@ pub async fn persist_dynamic_tools(
}
}
/// Get memory summaries for a thread id using SQLite.
pub async fn get_thread_memory(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
stage: &str,
) -> Option<codex_state::ThreadMemory> {
let ctx = context?;
match ctx.get_thread_memory(thread_id).await {
Ok(memory) => memory,
Err(err) => {
warn!("state db get_thread_memory failed during {stage}: {err}");
None
}
}
}
/// Upsert memory summaries for a thread id using SQLite.
pub async fn upsert_thread_memory(
context: Option<&codex_state::StateRuntime>,
thread_id: ThreadId,
raw_memory: &str,
memory_summary: &str,
stage: &str,
) -> Option<codex_state::ThreadMemory> {
let ctx = context?;
match ctx
.upsert_thread_memory(thread_id, raw_memory, memory_summary)
.await
{
Ok(memory) => Some(memory),
Err(err) => {
warn!("state db upsert_thread_memory failed during {stage}: {err}");
None
}
}
}
/// Get the last N memories corresponding to a cwd using an exact path match.
pub async fn get_last_n_thread_memories_for_cwd(
context: Option<&codex_state::StateRuntime>,
cwd: &Path,
n: usize,
stage: &str,
) -> Option<Vec<codex_state::ThreadMemory>> {
let ctx = context?;
let normalized_cwd = normalize_cwd_for_state_db(cwd);
match ctx
.get_last_n_thread_memories_for_cwd(&normalized_cwd, n)
.await
{
Ok(memories) => Some(memories),
Err(err) => {
warn!("state db get_last_n_thread_memories_for_cwd failed during {stage}: {err}");
None
}
}
}
/// Reconcile rollout items into SQLite, falling back to scanning the rollout file.
pub async fn reconcile_rollout(
context: Option<&codex_state::StateRuntime>,

View file

@ -0,0 +1,37 @@
DROP TABLE IF EXISTS thread_memory;
DROP TABLE IF EXISTS memory_phase1_jobs;
DROP TABLE IF EXISTS memory_scope_dirty;
DROP TABLE IF EXISTS memory_phase2_jobs;
DROP TABLE IF EXISTS memory_consolidation_locks;
CREATE TABLE IF NOT EXISTS stage1_outputs (
thread_id TEXT PRIMARY KEY,
source_updated_at INTEGER NOT NULL,
raw_memory TEXT NOT NULL,
summary TEXT NOT NULL,
generated_at INTEGER NOT NULL,
FOREIGN KEY(thread_id) REFERENCES threads(id) ON DELETE CASCADE
);
CREATE INDEX IF NOT EXISTS idx_stage1_outputs_source_updated_at
ON stage1_outputs(source_updated_at DESC, thread_id DESC);
CREATE TABLE IF NOT EXISTS jobs (
kind TEXT NOT NULL,
job_key TEXT NOT NULL,
status TEXT NOT NULL,
worker_id TEXT,
ownership_token TEXT,
started_at INTEGER,
finished_at INTEGER,
lease_until INTEGER,
retry_at INTEGER,
retry_remaining INTEGER NOT NULL,
last_error TEXT,
input_watermark INTEGER,
last_success_watermark INTEGER,
PRIMARY KEY (kind, job_key)
);
CREATE INDEX IF NOT EXISTS idx_jobs_kind_status_retry_lease
ON jobs(kind, status, retry_at, lease_until);

View file

@ -27,15 +27,16 @@ pub use model::BackfillStats;
pub use model::BackfillStatus;
pub use model::ExtractionOutcome;
pub use model::SortKey;
pub use model::ThreadMemory;
pub use model::Stage1Output;
pub use model::ThreadMetadata;
pub use model::ThreadMetadataBuilder;
pub use model::ThreadsPage;
pub use runtime::DirtyMemoryScope;
pub use runtime::Phase1JobClaimOutcome;
pub use runtime::PendingScopeConsolidation;
pub use runtime::Phase2JobClaimOutcome;
pub use runtime::STATE_DB_FILENAME;
pub use runtime::STATE_DB_VERSION;
pub use runtime::Stage1JobClaim;
pub use runtime::Stage1JobClaimOutcome;
pub use runtime::state_db_filename;
pub use runtime::state_db_path;

View file

@ -1,6 +1,6 @@
mod backfill_state;
mod log;
mod thread_memory;
mod stage1_output;
mod thread_metadata;
pub use backfill_state::BackfillState;
@ -8,7 +8,7 @@ pub use backfill_state::BackfillStatus;
pub use log::LogEntry;
pub use log::LogQuery;
pub use log::LogRow;
pub use thread_memory::ThreadMemory;
pub use stage1_output::Stage1Output;
pub use thread_metadata::Anchor;
pub use thread_metadata::BackfillStats;
pub use thread_metadata::ExtractionOutcome;
@ -17,7 +17,7 @@ pub use thread_metadata::ThreadMetadata;
pub use thread_metadata::ThreadMetadataBuilder;
pub use thread_metadata::ThreadsPage;
pub(crate) use thread_memory::ThreadMemoryRow;
pub(crate) use stage1_output::Stage1OutputRow;
pub(crate) use thread_metadata::ThreadRow;
pub(crate) use thread_metadata::anchor_from_item;
pub(crate) use thread_metadata::datetime_to_epoch_seconds;

View file

@ -0,0 +1,56 @@
use anyhow::Result;
use chrono::DateTime;
use chrono::Utc;
use codex_protocol::ThreadId;
use sqlx::Row;
use sqlx::sqlite::SqliteRow;
/// Stored stage-1 memory extraction output for a single thread.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Stage1Output {
pub thread_id: ThreadId,
pub source_updated_at: DateTime<Utc>,
pub raw_memory: String,
pub summary: String,
pub generated_at: DateTime<Utc>,
}
#[derive(Debug)]
pub(crate) struct Stage1OutputRow {
thread_id: String,
source_updated_at: i64,
raw_memory: String,
summary: String,
generated_at: i64,
}
impl Stage1OutputRow {
pub(crate) fn try_from_row(row: &SqliteRow) -> Result<Self> {
Ok(Self {
thread_id: row.try_get("thread_id")?,
source_updated_at: row.try_get("source_updated_at")?,
raw_memory: row.try_get("raw_memory")?,
summary: row.try_get("summary")?,
generated_at: row.try_get("generated_at")?,
})
}
}
impl TryFrom<Stage1OutputRow> for Stage1Output {
type Error = anyhow::Error;
fn try_from(row: Stage1OutputRow) -> std::result::Result<Self, Self::Error> {
Ok(Self {
thread_id: ThreadId::try_from(row.thread_id)?,
source_updated_at: epoch_seconds_to_datetime(row.source_updated_at)?,
raw_memory: row.raw_memory,
summary: row.summary,
generated_at: epoch_seconds_to_datetime(row.generated_at)?,
})
}
}
fn epoch_seconds_to_datetime(secs: i64) -> Result<DateTime<Utc>> {
DateTime::<Utc>::from_timestamp(secs, 0)
.ok_or_else(|| anyhow::anyhow!("invalid unix timestamp: {secs}"))
}

View file

@ -1,82 +0,0 @@
use anyhow::Result;
use chrono::DateTime;
use chrono::Utc;
use codex_protocol::ThreadId;
use sqlx::Row;
use sqlx::sqlite::SqliteRow;
/// Stored memory summaries for a single thread.
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct ThreadMemory {
pub thread_id: ThreadId,
pub scope_kind: String,
pub scope_key: String,
pub raw_memory: String,
pub memory_summary: String,
pub updated_at: DateTime<Utc>,
pub last_used_at: Option<DateTime<Utc>>,
pub used_count: i64,
pub invalidated_at: Option<DateTime<Utc>>,
pub invalid_reason: Option<String>,
}
#[derive(Debug)]
pub(crate) struct ThreadMemoryRow {
thread_id: String,
scope_kind: String,
scope_key: String,
raw_memory: String,
memory_summary: String,
updated_at: i64,
last_used_at: Option<i64>,
used_count: i64,
invalidated_at: Option<i64>,
invalid_reason: Option<String>,
}
impl ThreadMemoryRow {
pub(crate) fn try_from_row(row: &SqliteRow) -> Result<Self> {
Ok(Self {
thread_id: row.try_get("thread_id")?,
scope_kind: row.try_get("scope_kind")?,
scope_key: row.try_get("scope_key")?,
raw_memory: row.try_get("raw_memory")?,
memory_summary: row.try_get("memory_summary")?,
updated_at: row.try_get("updated_at")?,
last_used_at: row.try_get("last_used_at")?,
used_count: row.try_get("used_count")?,
invalidated_at: row.try_get("invalidated_at")?,
invalid_reason: row.try_get("invalid_reason")?,
})
}
}
impl TryFrom<ThreadMemoryRow> for ThreadMemory {
type Error = anyhow::Error;
fn try_from(row: ThreadMemoryRow) -> std::result::Result<Self, Self::Error> {
Ok(Self {
thread_id: ThreadId::try_from(row.thread_id)?,
scope_kind: row.scope_kind,
scope_key: row.scope_key,
raw_memory: row.raw_memory,
memory_summary: row.memory_summary,
updated_at: epoch_seconds_to_datetime(row.updated_at)?,
last_used_at: row
.last_used_at
.map(epoch_seconds_to_datetime)
.transpose()?,
used_count: row.used_count,
invalidated_at: row
.invalidated_at
.map(epoch_seconds_to_datetime)
.transpose()?,
invalid_reason: row.invalid_reason,
})
}
}
fn epoch_seconds_to_datetime(secs: i64) -> Result<DateTime<Utc>> {
DateTime::<Utc>::from_timestamp(secs, 0)
.ok_or_else(|| anyhow::anyhow!("invalid unix timestamp: {secs}"))
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,800 @@
use super::*;
use crate::Stage1Output;
use crate::model::Stage1OutputRow;
use chrono::Duration;
use sqlx::Executor;
use sqlx::Sqlite;
use std::collections::HashSet;
use std::path::Path;
use std::path::PathBuf;
const JOB_KIND_MEMORY_STAGE1: &str = "memory_stage1";
const JOB_KIND_MEMORY_CONSOLIDATE_CWD: &str = "memory_consolidate_cwd";
const JOB_KIND_MEMORY_CONSOLIDATE_USER: &str = "memory_consolidate_user";
const DEFAULT_RETRY_REMAINING: i64 = 3;
fn job_kind_for_scope(scope_kind: &str) -> Option<&'static str> {
match scope_kind {
MEMORY_SCOPE_KIND_CWD => Some(JOB_KIND_MEMORY_CONSOLIDATE_CWD),
MEMORY_SCOPE_KIND_USER => Some(JOB_KIND_MEMORY_CONSOLIDATE_USER),
_ => None,
}
}
fn scope_kind_for_job_kind(job_kind: &str) -> Option<&'static str> {
match job_kind {
JOB_KIND_MEMORY_CONSOLIDATE_CWD => Some(MEMORY_SCOPE_KIND_CWD),
JOB_KIND_MEMORY_CONSOLIDATE_USER => Some(MEMORY_SCOPE_KIND_USER),
_ => None,
}
}
fn normalize_cwd_for_scope_matching(cwd: &str) -> Option<PathBuf> {
Path::new(cwd).canonicalize().ok()
}
impl StateRuntime {
pub async fn claim_stage1_jobs_for_startup(
&self,
current_thread_id: ThreadId,
scan_limit: usize,
max_claimed: usize,
max_age_days: i64,
allowed_sources: &[String],
lease_seconds: i64,
) -> anyhow::Result<Vec<Stage1JobClaim>> {
if scan_limit == 0 || max_claimed == 0 {
return Ok(Vec::new());
}
let page = self
.list_threads(
scan_limit,
None,
SortKey::UpdatedAt,
allowed_sources,
None,
false,
)
.await?;
let cutoff = Utc::now() - Duration::days(max_age_days.max(0));
let mut claimed = Vec::new();
for item in page.items {
if claimed.len() >= max_claimed {
break;
}
if item.id == current_thread_id {
continue;
}
if item.updated_at < cutoff {
continue;
}
if let Stage1JobClaimOutcome::Claimed { ownership_token } = self
.try_claim_stage1_job(
item.id,
current_thread_id,
item.updated_at.timestamp(),
lease_seconds,
)
.await?
{
claimed.push(Stage1JobClaim {
thread: item,
ownership_token,
});
}
}
Ok(claimed)
}
pub async fn get_stage1_output(
&self,
thread_id: ThreadId,
) -> anyhow::Result<Option<Stage1Output>> {
let row = sqlx::query(
r#"
SELECT thread_id, source_updated_at, raw_memory, summary, generated_at
FROM stage1_outputs
WHERE thread_id = ?
"#,
)
.bind(thread_id.to_string())
.fetch_optional(self.pool.as_ref())
.await?;
row.map(|row| Stage1OutputRow::try_from_row(&row).and_then(Stage1Output::try_from))
.transpose()
}
pub async fn list_stage1_outputs_for_scope(
&self,
scope_kind: &str,
scope_key: &str,
n: usize,
) -> anyhow::Result<Vec<Stage1Output>> {
if n == 0 {
return Ok(Vec::new());
}
let rows = match scope_kind {
MEMORY_SCOPE_KIND_CWD => {
let exact_rows = sqlx::query(
r#"
SELECT so.thread_id, so.source_updated_at, so.raw_memory, so.summary, so.generated_at
FROM stage1_outputs AS so
JOIN threads AS t ON t.id = so.thread_id
WHERE t.cwd = ?
ORDER BY so.source_updated_at DESC, so.thread_id DESC
LIMIT ?
"#,
)
.bind(scope_key)
.bind(n as i64)
.fetch_all(self.pool.as_ref())
.await?;
if let Some(normalized_scope_key) = normalize_cwd_for_scope_matching(scope_key) {
let mut rows = Vec::new();
let mut selected_thread_ids = HashSet::new();
let candidate_rows = sqlx::query(
r#"
SELECT so.thread_id, so.source_updated_at, so.raw_memory, so.summary, so.generated_at, t.cwd AS thread_cwd
FROM stage1_outputs AS so
JOIN threads AS t ON t.id = so.thread_id
ORDER BY so.source_updated_at DESC, so.thread_id DESC
"#,
)
.fetch_all(self.pool.as_ref())
.await?;
for row in candidate_rows {
if rows.len() >= n {
break;
}
let thread_id: String = row.try_get("thread_id")?;
if selected_thread_ids.contains(&thread_id) {
continue;
}
let thread_cwd: String = row.try_get("thread_cwd")?;
if let Some(normalized_thread_cwd) =
normalize_cwd_for_scope_matching(&thread_cwd)
&& normalized_thread_cwd == normalized_scope_key
{
selected_thread_ids.insert(thread_id);
rows.push(row);
}
}
if rows.is_empty() { exact_rows } else { rows }
} else {
exact_rows
}
}
MEMORY_SCOPE_KIND_USER => {
sqlx::query(
r#"
SELECT so.thread_id, so.source_updated_at, so.raw_memory, so.summary, so.generated_at
FROM stage1_outputs AS so
JOIN threads AS t ON t.id = so.thread_id
ORDER BY so.source_updated_at DESC, so.thread_id DESC
LIMIT ?
"#,
)
.bind(n as i64)
.fetch_all(self.pool.as_ref())
.await?
}
_ => return Ok(Vec::new()),
};
rows.into_iter()
.map(|row| Stage1OutputRow::try_from_row(&row).and_then(Stage1Output::try_from))
.collect::<Result<Vec<_>, _>>()
}
pub async fn try_claim_stage1_job(
&self,
thread_id: ThreadId,
worker_id: ThreadId,
source_updated_at: i64,
lease_seconds: i64,
) -> anyhow::Result<Stage1JobClaimOutcome> {
let now = Utc::now().timestamp();
let lease_until = now.saturating_add(lease_seconds.max(0));
let ownership_token = Uuid::new_v4().to_string();
let thread_id = thread_id.to_string();
let worker_id = worker_id.to_string();
let mut tx = self.pool.begin().await?;
let existing_output = sqlx::query(
r#"
SELECT source_updated_at
FROM stage1_outputs
WHERE thread_id = ?
"#,
)
.bind(thread_id.as_str())
.fetch_optional(&mut *tx)
.await?;
if let Some(existing_output) = existing_output {
let existing_source_updated_at: i64 = existing_output.try_get("source_updated_at")?;
if existing_source_updated_at >= source_updated_at {
tx.commit().await?;
return Ok(Stage1JobClaimOutcome::SkippedUpToDate);
}
}
let existing_job = sqlx::query(
r#"
SELECT status, lease_until, retry_at, retry_remaining
FROM jobs
WHERE kind = ? AND job_key = ?
"#,
)
.bind(JOB_KIND_MEMORY_STAGE1)
.bind(thread_id.as_str())
.fetch_optional(&mut *tx)
.await?;
let Some(existing_job) = existing_job else {
sqlx::query(
r#"
INSERT INTO jobs (
kind,
job_key,
status,
worker_id,
ownership_token,
started_at,
finished_at,
lease_until,
retry_at,
retry_remaining,
last_error,
input_watermark,
last_success_watermark
) VALUES (?, ?, 'running', ?, ?, ?, NULL, ?, NULL, ?, NULL, ?, NULL)
"#,
)
.bind(JOB_KIND_MEMORY_STAGE1)
.bind(thread_id.as_str())
.bind(worker_id.as_str())
.bind(ownership_token.as_str())
.bind(now)
.bind(lease_until)
.bind(DEFAULT_RETRY_REMAINING)
.bind(source_updated_at)
.execute(&mut *tx)
.await?;
tx.commit().await?;
return Ok(Stage1JobClaimOutcome::Claimed { ownership_token });
};
let status: String = existing_job.try_get("status")?;
let existing_lease_until: Option<i64> = existing_job.try_get("lease_until")?;
let retry_at: Option<i64> = existing_job.try_get("retry_at")?;
let retry_remaining: i64 = existing_job.try_get("retry_remaining")?;
if retry_remaining <= 0 {
tx.commit().await?;
return Ok(Stage1JobClaimOutcome::SkippedRetryExhausted);
}
if retry_at.is_some_and(|retry_at| retry_at > now) {
tx.commit().await?;
return Ok(Stage1JobClaimOutcome::SkippedRetryBackoff);
}
if status == "running" && existing_lease_until.is_some_and(|lease_until| lease_until > now)
{
tx.commit().await?;
return Ok(Stage1JobClaimOutcome::SkippedRunning);
}
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'running',
worker_id = ?,
ownership_token = ?,
started_at = ?,
finished_at = NULL,
lease_until = ?,
retry_at = NULL,
last_error = NULL,
input_watermark = ?
WHERE kind = ? AND job_key = ?
AND (status != 'running' OR lease_until IS NULL OR lease_until <= ?)
AND (retry_at IS NULL OR retry_at <= ?)
AND retry_remaining > 0
"#,
)
.bind(worker_id.as_str())
.bind(ownership_token.as_str())
.bind(now)
.bind(lease_until)
.bind(source_updated_at)
.bind(JOB_KIND_MEMORY_STAGE1)
.bind(thread_id.as_str())
.bind(now)
.bind(now)
.execute(&mut *tx)
.await?
.rows_affected();
tx.commit().await?;
if rows_affected == 0 {
Ok(Stage1JobClaimOutcome::SkippedRunning)
} else {
Ok(Stage1JobClaimOutcome::Claimed { ownership_token })
}
}
pub async fn mark_stage1_job_succeeded(
&self,
thread_id: ThreadId,
ownership_token: &str,
source_updated_at: i64,
raw_memory: &str,
summary: &str,
) -> anyhow::Result<bool> {
let now = Utc::now().timestamp();
let thread_id = thread_id.to_string();
let mut tx = self.pool.begin().await?;
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'done',
finished_at = ?,
lease_until = NULL,
last_error = NULL,
last_success_watermark = input_watermark
WHERE kind = ? AND job_key = ?
AND status = 'running' AND ownership_token = ?
"#,
)
.bind(now)
.bind(JOB_KIND_MEMORY_STAGE1)
.bind(thread_id.as_str())
.bind(ownership_token)
.execute(&mut *tx)
.await?
.rows_affected();
if rows_affected == 0 {
tx.commit().await?;
return Ok(false);
}
sqlx::query(
r#"
INSERT INTO stage1_outputs (
thread_id,
source_updated_at,
raw_memory,
summary,
generated_at
) VALUES (?, ?, ?, ?, ?)
ON CONFLICT(thread_id) DO UPDATE SET
source_updated_at = excluded.source_updated_at,
raw_memory = excluded.raw_memory,
summary = excluded.summary,
generated_at = excluded.generated_at
WHERE excluded.source_updated_at >= stage1_outputs.source_updated_at
"#,
)
.bind(thread_id.as_str())
.bind(source_updated_at)
.bind(raw_memory)
.bind(summary)
.bind(now)
.execute(&mut *tx)
.await?;
if let Some(thread_row) = sqlx::query(
r#"
SELECT cwd
FROM threads
WHERE id = ?
"#,
)
.bind(thread_id.as_str())
.fetch_optional(&mut *tx)
.await?
{
let cwd: String = thread_row.try_get("cwd")?;
let normalized_cwd = normalize_cwd_for_scope_matching(&cwd)
.unwrap_or_else(|| PathBuf::from(&cwd))
.display()
.to_string();
enqueue_scope_consolidation_with_executor(
&mut *tx,
MEMORY_SCOPE_KIND_CWD,
&normalized_cwd,
source_updated_at,
)
.await?;
enqueue_scope_consolidation_with_executor(
&mut *tx,
MEMORY_SCOPE_KIND_USER,
MEMORY_SCOPE_KEY_USER,
source_updated_at,
)
.await?;
}
tx.commit().await?;
Ok(true)
}
pub async fn mark_stage1_job_failed(
&self,
thread_id: ThreadId,
ownership_token: &str,
failure_reason: &str,
retry_delay_seconds: i64,
) -> anyhow::Result<bool> {
let now = Utc::now().timestamp();
let retry_at = now.saturating_add(retry_delay_seconds.max(0));
let thread_id = thread_id.to_string();
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'error',
finished_at = ?,
lease_until = NULL,
retry_at = ?,
retry_remaining = retry_remaining - 1,
last_error = ?
WHERE kind = ? AND job_key = ?
AND status = 'running' AND ownership_token = ?
"#,
)
.bind(now)
.bind(retry_at)
.bind(failure_reason)
.bind(JOB_KIND_MEMORY_STAGE1)
.bind(thread_id.as_str())
.bind(ownership_token)
.execute(self.pool.as_ref())
.await?
.rows_affected();
Ok(rows_affected > 0)
}
pub async fn enqueue_scope_consolidation(
&self,
scope_kind: &str,
scope_key: &str,
input_watermark: i64,
) -> anyhow::Result<()> {
enqueue_scope_consolidation_with_executor(
self.pool.as_ref(),
scope_kind,
scope_key,
input_watermark,
)
.await
}
pub async fn list_pending_scope_consolidations(
&self,
limit: usize,
) -> anyhow::Result<Vec<PendingScopeConsolidation>> {
if limit == 0 {
return Ok(Vec::new());
}
let now = Utc::now().timestamp();
let rows = sqlx::query(
r#"
SELECT kind, job_key
FROM jobs
WHERE kind IN (?, ?)
AND input_watermark IS NOT NULL
AND input_watermark > COALESCE(last_success_watermark, 0)
AND retry_remaining > 0
AND (retry_at IS NULL OR retry_at <= ?)
AND (status != 'running' OR lease_until IS NULL OR lease_until <= ?)
ORDER BY input_watermark DESC, kind ASC, job_key ASC
LIMIT ?
"#,
)
.bind(JOB_KIND_MEMORY_CONSOLIDATE_CWD)
.bind(JOB_KIND_MEMORY_CONSOLIDATE_USER)
.bind(now)
.bind(now)
.bind(limit as i64)
.fetch_all(self.pool.as_ref())
.await?;
Ok(rows
.into_iter()
.filter_map(|row| {
let kind: String = row.try_get("kind").ok()?;
let scope_kind = scope_kind_for_job_kind(&kind)?;
let scope_key: String = row.try_get("job_key").ok()?;
Some(PendingScopeConsolidation {
scope_kind: scope_kind.to_string(),
scope_key,
})
})
.collect::<Vec<_>>())
}
/// Try to claim a phase-2 consolidation job for `(scope_kind, scope_key)`.
pub async fn try_claim_phase2_job(
&self,
scope_kind: &str,
scope_key: &str,
worker_id: ThreadId,
lease_seconds: i64,
) -> anyhow::Result<Phase2JobClaimOutcome> {
let Some(job_kind) = job_kind_for_scope(scope_kind) else {
return Ok(Phase2JobClaimOutcome::SkippedNotDirty);
};
let now = Utc::now().timestamp();
let lease_until = now.saturating_add(lease_seconds.max(0));
let ownership_token = Uuid::new_v4().to_string();
let worker_id = worker_id.to_string();
let mut tx = self.pool.begin().await?;
let existing_job = sqlx::query(
r#"
SELECT status, lease_until, retry_at, retry_remaining, input_watermark, last_success_watermark
FROM jobs
WHERE kind = ? AND job_key = ?
"#,
)
.bind(job_kind)
.bind(scope_key)
.fetch_optional(&mut *tx)
.await?;
let Some(existing_job) = existing_job else {
tx.commit().await?;
return Ok(Phase2JobClaimOutcome::SkippedNotDirty);
};
let input_watermark: Option<i64> = existing_job.try_get("input_watermark")?;
let input_watermark_value = input_watermark.unwrap_or(0);
let last_success_watermark: Option<i64> = existing_job.try_get("last_success_watermark")?;
if input_watermark_value <= last_success_watermark.unwrap_or(0) {
tx.commit().await?;
return Ok(Phase2JobClaimOutcome::SkippedNotDirty);
}
let status: String = existing_job.try_get("status")?;
let existing_lease_until: Option<i64> = existing_job.try_get("lease_until")?;
let retry_at: Option<i64> = existing_job.try_get("retry_at")?;
let retry_remaining: i64 = existing_job.try_get("retry_remaining")?;
if retry_remaining <= 0 {
tx.commit().await?;
return Ok(Phase2JobClaimOutcome::SkippedNotDirty);
}
if retry_at.is_some_and(|retry_at| retry_at > now) {
tx.commit().await?;
return Ok(Phase2JobClaimOutcome::SkippedNotDirty);
}
if status == "running" && existing_lease_until.is_some_and(|lease_until| lease_until > now)
{
tx.commit().await?;
return Ok(Phase2JobClaimOutcome::SkippedRunning);
}
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'running',
worker_id = ?,
ownership_token = ?,
started_at = ?,
finished_at = NULL,
lease_until = ?,
retry_at = NULL,
last_error = NULL
WHERE kind = ? AND job_key = ?
AND (status != 'running' OR lease_until IS NULL OR lease_until <= ?)
AND (retry_at IS NULL OR retry_at <= ?)
AND retry_remaining > 0
"#,
)
.bind(worker_id.as_str())
.bind(ownership_token.as_str())
.bind(now)
.bind(lease_until)
.bind(job_kind)
.bind(scope_key)
.bind(now)
.bind(now)
.execute(&mut *tx)
.await?
.rows_affected();
tx.commit().await?;
if rows_affected == 0 {
Ok(Phase2JobClaimOutcome::SkippedRunning)
} else {
Ok(Phase2JobClaimOutcome::Claimed {
ownership_token,
input_watermark: input_watermark_value,
})
}
}
pub async fn heartbeat_phase2_job(
&self,
scope_kind: &str,
scope_key: &str,
ownership_token: &str,
lease_seconds: i64,
) -> anyhow::Result<bool> {
let Some(job_kind) = job_kind_for_scope(scope_kind) else {
return Ok(false);
};
let now = Utc::now().timestamp();
let lease_until = now.saturating_add(lease_seconds.max(0));
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET lease_until = ?
WHERE kind = ? AND job_key = ?
AND status = 'running' AND ownership_token = ?
"#,
)
.bind(lease_until)
.bind(job_kind)
.bind(scope_key)
.bind(ownership_token)
.execute(self.pool.as_ref())
.await?
.rows_affected();
Ok(rows_affected > 0)
}
pub async fn mark_phase2_job_succeeded(
&self,
scope_kind: &str,
scope_key: &str,
ownership_token: &str,
completed_watermark: i64,
) -> anyhow::Result<bool> {
let Some(job_kind) = job_kind_for_scope(scope_kind) else {
return Ok(false);
};
let now = Utc::now().timestamp();
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'done',
finished_at = ?,
lease_until = NULL,
last_error = NULL,
last_success_watermark = max(COALESCE(last_success_watermark, 0), ?)
WHERE kind = ? AND job_key = ?
AND status = 'running' AND ownership_token = ?
"#,
)
.bind(now)
.bind(completed_watermark)
.bind(job_kind)
.bind(scope_key)
.bind(ownership_token)
.execute(self.pool.as_ref())
.await?
.rows_affected();
Ok(rows_affected > 0)
}
pub async fn mark_phase2_job_failed(
&self,
scope_kind: &str,
scope_key: &str,
ownership_token: &str,
failure_reason: &str,
retry_delay_seconds: i64,
) -> anyhow::Result<bool> {
let Some(job_kind) = job_kind_for_scope(scope_kind) else {
return Ok(false);
};
let now = Utc::now().timestamp();
let retry_at = now.saturating_add(retry_delay_seconds.max(0));
let rows_affected = sqlx::query(
r#"
UPDATE jobs
SET
status = 'error',
finished_at = ?,
lease_until = NULL,
retry_at = ?,
retry_remaining = retry_remaining - 1,
last_error = ?
WHERE kind = ? AND job_key = ?
AND status = 'running' AND ownership_token = ?
"#,
)
.bind(now)
.bind(retry_at)
.bind(failure_reason)
.bind(job_kind)
.bind(scope_key)
.bind(ownership_token)
.execute(self.pool.as_ref())
.await?
.rows_affected();
Ok(rows_affected > 0)
}
}
async fn enqueue_scope_consolidation_with_executor<'e, E>(
executor: E,
scope_kind: &str,
scope_key: &str,
input_watermark: i64,
) -> anyhow::Result<()>
where
E: Executor<'e, Database = Sqlite>,
{
let Some(job_kind) = job_kind_for_scope(scope_kind) else {
return Ok(());
};
sqlx::query(
r#"
INSERT INTO jobs (
kind,
job_key,
status,
worker_id,
ownership_token,
started_at,
finished_at,
lease_until,
retry_at,
retry_remaining,
last_error,
input_watermark,
last_success_watermark
) VALUES (?, ?, 'pending', NULL, NULL, NULL, NULL, NULL, NULL, ?, NULL, ?, 0)
ON CONFLICT(kind, job_key) DO UPDATE SET
status = CASE
WHEN jobs.status = 'running' THEN 'running'
ELSE 'pending'
END,
retry_at = CASE
WHEN jobs.status = 'running' THEN jobs.retry_at
ELSE NULL
END,
retry_remaining = max(jobs.retry_remaining, excluded.retry_remaining),
input_watermark = max(COALESCE(jobs.input_watermark, 0), excluded.input_watermark)
"#,
)
.bind(job_kind)
.bind(scope_key)
.bind(DEFAULT_RETRY_REMAINING)
.bind(input_watermark)
.execute(executor)
.await?;
Ok(())
}