Add cwd to memory files (#11591)

Add cwd to memory files so that model can deal with multi cwd memory better. --------- Co-authored-by: jif-oai <jif@openai.com>
2026-02-12 09:46:49 -08:00 · 2026-02-12 09:46:49 -08:00 · 88c5ca2573
commit 88c5ca2573
parent 82acd815e4
7 changed files with 29 additions and 3 deletions
--- a/codex-rs/core/src/memories/dispatch.rs
+++ b/codex-rs/core/src/memories/dispatch.rs
@ -284,6 +284,7 @@ mod tests {
    use codex_state::Stage1Output;
    use codex_state::ThreadMetadataBuilder;
    use pretty_assertions::assert_eq;
+    use std::path::PathBuf;
    use std::sync::Arc;
    use tempfile::TempDir;

@ -402,6 +403,7 @@ mod tests {
                .expect("valid source_updated_at timestamp"),
            raw_memory: "raw memory".to_string(),
            rollout_summary: "rollout summary".to_string(),
+            cwd: PathBuf::from("/tmp/workspace"),
            generated_at: chrono::DateTime::<Utc>::from_timestamp(124, 0)
                .expect("valid generated_at timestamp"),
        };
--- a/codex-rs/core/src/memories/storage.rs
+++ b/codex-rs/core/src/memories/storage.rs
@ -84,6 +84,8 @@ async fn rebuild_raw_memories_file(root: &Path, memories: &[Stage1Output]) -> st
            memory.source_updated_at.to_rfc3339()
        )
        .map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
+        writeln!(body, "cwd: {}", memory.cwd.display())
+            .map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
        writeln!(body)
            .map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
        body.push_str(memory.raw_memory.trim());
@ -138,6 +140,8 @@ async fn write_rollout_summary_for_thread(
        memory.source_updated_at.to_rfc3339()
    )
    .map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
+    writeln!(body, "cwd: {}", memory.cwd.display())
+        .map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
    writeln!(body)
        .map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
    body.push_str(&memory.rollout_summary);
--- a/codex-rs/core/src/memories/tests.rs
+++ b/codex-rs/core/src/memories/tests.rs
@ -10,6 +10,7 @@ use codex_protocol::ThreadId;
 use codex_state::Stage1Output;
 use pretty_assertions::assert_eq;
 use serde_json::Value;
+use std::path::PathBuf;
 use tempfile::tempdir;

 #[test]
@ -65,6 +66,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
        source_updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
        raw_memory: "raw memory".to_string(),
        rollout_summary: "short summary".to_string(),
+        cwd: PathBuf::from("/tmp/workspace"),
        generated_at: Utc.timestamp_opt(101, 0).single().expect("timestamp"),
    }];

@ -83,4 +85,5 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
        .expect("read raw memories");
    assert!(raw_memories.contains("raw memory"));
    assert!(raw_memories.contains(&keep_id));
+    assert!(raw_memories.contains("cwd: /tmp/workspace"));
 }
--- a/codex-rs/core/templates/memories/consolidation.md
+++ b/codex-rs/core/templates/memories/consolidation.md
@ -97,6 +97,8 @@ Primary inputs (always read these, if exists):
 Under `{{ memory_root }}/`:
 - `raw_memories.md`
  - mechanical merge of `raw_memories` from Phase 1;
+  - source of rollout-level metadata needed for MEMORY.md header annotations;
+    you should be able to find `cwd` and `updated_at` there.
 - `MEMORY.md`
  - merged memories; produce a lightly clustered version if applicable
 - `rollout_summaries/*.md`
@ -129,8 +131,8 @@ Rules:
 Clustered schema:
 ---
 rollout_summary_files:
-  - <file1.md> (<a few words annotation such as "success, most useful" or "uncertain, no user feedback">)
-  - <file2.md> (<annotation>)
+  - <file1.md> (<annotation that includes status/usefulness, cwd, and updated_at, e.g. "success, most useful architecture walkthrough, cwd=/repo/path, updated_at=2026-02-12T10:30:00Z">)
+  - <file2.md> (<annotation with cwd=/..., updated_at=...>)
 description: brief description of the shared tasks/outcomes
 keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo concepts, contracts)>
 ---
@ -141,6 +143,9 @@ keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo con
 Schema rules (strict):
 - Keep entries compact and retrieval-friendly.
 - A single note block may correspond to multiple related tasks; aggregate when tasks and lessons align.
+- In `rollout_summary_files`, each parenthesized annotation must include
+  `cwd=<path>` and `updated_at=<timestamp>` copied from that rollout summary metadata.
+  If missing from an individual rollout summary, recover them from `raw_memories.md`.
 - If you need to reference skills, do it in the BODY as bullets, not in the header
  (e.g., "- Related skill: skills/<skill-name>/SKILL.md").
 - Use lowercase, hyphenated skill folder names.
@ -338,4 +343,4 @@ Use `rg` for fast retrieval while consolidating:
 - Search across memory tree:
  `rg -n -i "<pattern>" "{{ memory_root }}" | head -n 50`
 - Locate rollout summary files:
-  `rg --files "{{ memory_root }}/rollout_summaries" | head -n 200`
+  `rg --files "{{ memory_root }}/rollout_summaries" | head -n 200`
--- a/codex-rs/state/src/model/memories.rs
+++ b/codex-rs/state/src/model/memories.rs
@ -4,6 +4,7 @@ use chrono::Utc;
 use codex_protocol::ThreadId;
 use sqlx::Row;
 use sqlx::sqlite::SqliteRow;
+use std::path::PathBuf;

 use super::ThreadMetadata;

@ -14,6 +15,7 @@ pub struct Stage1Output {
    pub source_updated_at: DateTime<Utc>,
    pub raw_memory: String,
    pub rollout_summary: String,
+    pub cwd: PathBuf,
    pub generated_at: DateTime<Utc>,
 }

@ -23,6 +25,7 @@ pub(crate) struct Stage1OutputRow {
    source_updated_at: i64,
    raw_memory: String,
    rollout_summary: String,
+    cwd: String,
    generated_at: i64,
 }

@ -33,6 +36,7 @@ impl Stage1OutputRow {
            source_updated_at: row.try_get("source_updated_at")?,
            raw_memory: row.try_get("raw_memory")?,
            rollout_summary: row.try_get("rollout_summary")?,
+            cwd: row.try_get("cwd")?,
            generated_at: row.try_get("generated_at")?,
        })
    }
@ -47,6 +51,7 @@ impl TryFrom<Stage1OutputRow> for Stage1Output {
            source_updated_at: epoch_seconds_to_datetime(row.source_updated_at)?,
            raw_memory: row.raw_memory,
            rollout_summary: row.rollout_summary,
+            cwd: PathBuf::from(row.cwd),
            generated_at: epoch_seconds_to_datetime(row.generated_at)?,
        })
    }
--- a/codex-rs/state/src/runtime.rs
+++ b/codex-rs/state/src/runtime.rs
@ -2093,8 +2093,10 @@ WHERE kind = 'memory_stage1'
        assert_eq!(outputs.len(), 2);
        assert_eq!(outputs[0].thread_id, thread_id_b);
        assert_eq!(outputs[0].rollout_summary, "summary b");
+        assert_eq!(outputs[0].cwd, codex_home.join("workspace-b"));
        assert_eq!(outputs[1].thread_id, thread_id_a);
        assert_eq!(outputs[1].rollout_summary, "summary a");
+        assert_eq!(outputs[1].cwd, codex_home.join("workspace-a"));

        let _ = tokio::fs::remove_dir_all(codex_home).await;
    }
@ -2163,6 +2165,7 @@ VALUES (?, ?, ?, ?, ?)
        assert_eq!(outputs.len(), 1);
        assert_eq!(outputs[0].thread_id, thread_id_non_empty);
        assert_eq!(outputs[0].rollout_summary, "summary");
+        assert_eq!(outputs[0].cwd, codex_home.join("workspace-non-empty"));

        let _ = tokio::fs::remove_dir_all(codex_home).await;
    }
--- a/codex-rs/state/src/runtime/memories.rs
+++ b/codex-rs/state/src/runtime/memories.rs
@ -178,6 +178,7 @@ LEFT JOIN jobs
    ///
    /// Query behavior:
    /// - filters out rows where both `raw_memory` and `rollout_summary` are blank
+    /// - joins `threads` to include thread `cwd`
    /// - orders by `source_updated_at DESC, thread_id DESC`
    /// - applies `LIMIT n`
    pub async fn list_stage1_outputs_for_global(
@ -191,7 +192,10 @@ LEFT JOIN jobs
        let rows = sqlx::query(
            r#"
 SELECT so.thread_id, so.source_updated_at, so.raw_memory, so.rollout_summary, so.generated_at
+     , COALESCE(t.cwd, '') AS cwd
 FROM stage1_outputs AS so
+LEFT JOIN threads AS t
+    ON t.id = so.thread_id
 WHERE length(trim(so.raw_memory)) > 0 OR length(trim(so.rollout_summary)) > 0
 ORDER BY so.source_updated_at DESC, so.thread_id DESC
 LIMIT ?