Add cwd to memory files (#11591)

Add cwd to memory files so that model can deal with multi cwd memory
better.

---------

Co-authored-by: jif-oai <jif@openai.com>
This commit is contained in:
Wendy Jiao 2026-02-12 09:46:49 -08:00 committed by GitHub
parent 82acd815e4
commit 88c5ca2573
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 29 additions and 3 deletions

View file

@ -284,6 +284,7 @@ mod tests {
use codex_state::Stage1Output;
use codex_state::ThreadMetadataBuilder;
use pretty_assertions::assert_eq;
use std::path::PathBuf;
use std::sync::Arc;
use tempfile::TempDir;
@ -402,6 +403,7 @@ mod tests {
.expect("valid source_updated_at timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "rollout summary".to_string(),
cwd: PathBuf::from("/tmp/workspace"),
generated_at: chrono::DateTime::<Utc>::from_timestamp(124, 0)
.expect("valid generated_at timestamp"),
};

View file

@ -84,6 +84,8 @@ async fn rebuild_raw_memories_file(root: &Path, memories: &[Stage1Output]) -> st
memory.source_updated_at.to_rfc3339()
)
.map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
writeln!(body, "cwd: {}", memory.cwd.display())
.map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
writeln!(body)
.map_err(|err| std::io::Error::other(format!("format raw memories: {err}")))?;
body.push_str(memory.raw_memory.trim());
@ -138,6 +140,8 @@ async fn write_rollout_summary_for_thread(
memory.source_updated_at.to_rfc3339()
)
.map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
writeln!(body, "cwd: {}", memory.cwd.display())
.map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
writeln!(body)
.map_err(|err| std::io::Error::other(format!("format rollout summary: {err}")))?;
body.push_str(&memory.rollout_summary);

View file

@ -10,6 +10,7 @@ use codex_protocol::ThreadId;
use codex_state::Stage1Output;
use pretty_assertions::assert_eq;
use serde_json::Value;
use std::path::PathBuf;
use tempfile::tempdir;
#[test]
@ -65,6 +66,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
source_updated_at: Utc.timestamp_opt(100, 0).single().expect("timestamp"),
raw_memory: "raw memory".to_string(),
rollout_summary: "short summary".to_string(),
cwd: PathBuf::from("/tmp/workspace"),
generated_at: Utc.timestamp_opt(101, 0).single().expect("timestamp"),
}];
@ -83,4 +85,5 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
.expect("read raw memories");
assert!(raw_memories.contains("raw memory"));
assert!(raw_memories.contains(&keep_id));
assert!(raw_memories.contains("cwd: /tmp/workspace"));
}

View file

@ -97,6 +97,8 @@ Primary inputs (always read these, if exists):
Under `{{ memory_root }}/`:
- `raw_memories.md`
- mechanical merge of `raw_memories` from Phase 1;
- source of rollout-level metadata needed for MEMORY.md header annotations;
you should be able to find `cwd` and `updated_at` there.
- `MEMORY.md`
- merged memories; produce a lightly clustered version if applicable
- `rollout_summaries/*.md`
@ -129,8 +131,8 @@ Rules:
Clustered schema:
---
rollout_summary_files:
- <file1.md> (<a few words annotation such as "success, most useful" or "uncertain, no user feedback">)
- <file2.md> (<annotation>)
- <file1.md> (<annotation that includes status/usefulness, cwd, and updated_at, e.g. "success, most useful architecture walkthrough, cwd=/repo/path, updated_at=2026-02-12T10:30:00Z">)
- <file2.md> (<annotation with cwd=/..., updated_at=...>)
description: brief description of the shared tasks/outcomes
keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo concepts, contracts)>
---
@ -141,6 +143,9 @@ keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo con
Schema rules (strict):
- Keep entries compact and retrieval-friendly.
- A single note block may correspond to multiple related tasks; aggregate when tasks and lessons align.
- In `rollout_summary_files`, each parenthesized annotation must include
`cwd=<path>` and `updated_at=<timestamp>` copied from that rollout summary metadata.
If missing from an individual rollout summary, recover them from `raw_memories.md`.
- If you need to reference skills, do it in the BODY as bullets, not in the header
(e.g., "- Related skill: skills/<skill-name>/SKILL.md").
- Use lowercase, hyphenated skill folder names.
@ -338,4 +343,4 @@ Use `rg` for fast retrieval while consolidating:
- Search across memory tree:
`rg -n -i "<pattern>" "{{ memory_root }}" | head -n 50`
- Locate rollout summary files:
`rg --files "{{ memory_root }}/rollout_summaries" | head -n 200`
`rg --files "{{ memory_root }}/rollout_summaries" | head -n 200`

View file

@ -4,6 +4,7 @@ use chrono::Utc;
use codex_protocol::ThreadId;
use sqlx::Row;
use sqlx::sqlite::SqliteRow;
use std::path::PathBuf;
use super::ThreadMetadata;
@ -14,6 +15,7 @@ pub struct Stage1Output {
pub source_updated_at: DateTime<Utc>,
pub raw_memory: String,
pub rollout_summary: String,
pub cwd: PathBuf,
pub generated_at: DateTime<Utc>,
}
@ -23,6 +25,7 @@ pub(crate) struct Stage1OutputRow {
source_updated_at: i64,
raw_memory: String,
rollout_summary: String,
cwd: String,
generated_at: i64,
}
@ -33,6 +36,7 @@ impl Stage1OutputRow {
source_updated_at: row.try_get("source_updated_at")?,
raw_memory: row.try_get("raw_memory")?,
rollout_summary: row.try_get("rollout_summary")?,
cwd: row.try_get("cwd")?,
generated_at: row.try_get("generated_at")?,
})
}
@ -47,6 +51,7 @@ impl TryFrom<Stage1OutputRow> for Stage1Output {
source_updated_at: epoch_seconds_to_datetime(row.source_updated_at)?,
raw_memory: row.raw_memory,
rollout_summary: row.rollout_summary,
cwd: PathBuf::from(row.cwd),
generated_at: epoch_seconds_to_datetime(row.generated_at)?,
})
}

View file

@ -2093,8 +2093,10 @@ WHERE kind = 'memory_stage1'
assert_eq!(outputs.len(), 2);
assert_eq!(outputs[0].thread_id, thread_id_b);
assert_eq!(outputs[0].rollout_summary, "summary b");
assert_eq!(outputs[0].cwd, codex_home.join("workspace-b"));
assert_eq!(outputs[1].thread_id, thread_id_a);
assert_eq!(outputs[1].rollout_summary, "summary a");
assert_eq!(outputs[1].cwd, codex_home.join("workspace-a"));
let _ = tokio::fs::remove_dir_all(codex_home).await;
}
@ -2163,6 +2165,7 @@ VALUES (?, ?, ?, ?, ?)
assert_eq!(outputs.len(), 1);
assert_eq!(outputs[0].thread_id, thread_id_non_empty);
assert_eq!(outputs[0].rollout_summary, "summary");
assert_eq!(outputs[0].cwd, codex_home.join("workspace-non-empty"));
let _ = tokio::fs::remove_dir_all(codex_home).await;
}

View file

@ -178,6 +178,7 @@ LEFT JOIN jobs
///
/// Query behavior:
/// - filters out rows where both `raw_memory` and `rollout_summary` are blank
/// - joins `threads` to include thread `cwd`
/// - orders by `source_updated_at DESC, thread_id DESC`
/// - applies `LIMIT n`
pub async fn list_stage1_outputs_for_global(
@ -191,7 +192,10 @@ LEFT JOIN jobs
let rows = sqlx::query(
r#"
SELECT so.thread_id, so.source_updated_at, so.raw_memory, so.rollout_summary, so.generated_at
, COALESCE(t.cwd, '') AS cwd
FROM stage1_outputs AS so
LEFT JOIN threads AS t
ON t.id = so.thread_id
WHERE length(trim(so.raw_memory)) > 0 OR length(trim(so.rollout_summary)) > 0
ORDER BY so.source_updated_at DESC, so.thread_id DESC
LIMIT ?