memories: add rollout_summary_file header to raw memories and tune prompts (#12221)
## Summary - Add `rollout_summary_file: <generated>.md` to each thread header in `raw_memories.md` so Phase 2 can reliably reference the canonical rollout summary filename. - Update the memory prompts/templates (`stage_one_system`, `consolidation`, `read_path`) for the new task-oriented raw-memory / MEMORY.md schema and stronger consolidation guidance. ## Details - `codex-rs/core/src/memories/storage.rs` - Writes the generated `rollout_summary_file` path into the per-thread metadata header when rebuilding `raw_memories.md`. - `codex-rs/core/src/memories/tests.rs` - Verifies the canonical `rollout_summary_file` header is present and ordered after `updated_at`/`cwd` in `raw_memories.md`. - Verifies task-structured raw-memory content is preserved while the canonical header is added. - `codex-rs/core/templates/memories/*.md` - Updates the stage-1 raw-memory format to task-grouped sections (`task`, `task_group`, `task_outcome`). - Updates Phase 2 consolidation guidance around recency (`updated_at`), task-oriented `MEMORY.md` blocks, and richer evidence-backed consolidation. - Tweaks the quick memory pass wording to emphasize topics/workflows in addition to keywords. ## Testing - `cargo test -p codex-core memories`
This commit is contained in:
parent
18bd6d2d71
commit
e747a8eb74
5 changed files with 298 additions and 153 deletions
|
|
@ -82,11 +82,11 @@ async fn rebuild_raw_memories_file(
|
|||
)
|
||||
.map_err(raw_memories_format_error)?;
|
||||
writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?;
|
||||
writeln!(body).map_err(raw_memories_format_error)?;
|
||||
let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory));
|
||||
let raw_memory =
|
||||
replace_rollout_summary_file_in_raw_memory(&memory.raw_memory, &rollout_summary_file);
|
||||
body.push_str(raw_memory.trim());
|
||||
writeln!(body, "rollout_summary_file: {rollout_summary_file}")
|
||||
.map_err(raw_memories_format_error)?;
|
||||
writeln!(body).map_err(raw_memories_format_error)?;
|
||||
body.push_str(memory.raw_memory.trim());
|
||||
body.push_str("\n\n");
|
||||
}
|
||||
|
||||
|
|
@ -161,26 +161,6 @@ fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error {
|
|||
std::io::Error::other(format!("format rollout summary: {err}"))
|
||||
}
|
||||
|
||||
fn replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory: &str,
|
||||
rollout_summary_file: &str,
|
||||
) -> String {
|
||||
const ROLLOUT_SUMMARY_PREFIX: &str = "rollout_summary_file: ";
|
||||
|
||||
let replacement = format!("rollout_summary_file: {rollout_summary_file}");
|
||||
raw_memory
|
||||
.split('\n')
|
||||
.map(|line| {
|
||||
if line.starts_with(ROLLOUT_SUMMARY_PREFIX) {
|
||||
replacement.as_str()
|
||||
} else {
|
||||
line
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n")
|
||||
}
|
||||
|
||||
pub(crate) fn rollout_summary_file_stem(memory: &Stage1Output) -> String {
|
||||
rollout_summary_file_stem_from_parts(
|
||||
memory.thread_id,
|
||||
|
|
@ -270,7 +250,6 @@ pub(super) fn rollout_summary_file_stem_from_parts(
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::replace_rollout_summary_file_in_raw_memory;
|
||||
use super::rollout_summary_file_stem;
|
||||
use super::rollout_summary_file_stem_from_parts;
|
||||
use chrono::TimeZone;
|
||||
|
|
@ -339,71 +318,4 @@ mod tests {
|
|||
|
||||
assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_replaces_existing_value() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
rollout_summary_file: wrong.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
normalized,
|
||||
"\
|
||||
---
|
||||
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_replaces_placeholder() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
rollout_summary_file: <system_populated_file.md>
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
normalized,
|
||||
"\
|
||||
---
|
||||
rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_rollout_summary_file_in_raw_memory_leaves_text_without_field_unchanged() {
|
||||
let raw_memory = "\
|
||||
---
|
||||
description: demo
|
||||
keywords: one, two
|
||||
---
|
||||
- body line";
|
||||
let normalized = replace_rollout_summary_file_in_raw_memory(
|
||||
raw_memory,
|
||||
"2025-01-01T00-00-00-abcd-demo.md",
|
||||
);
|
||||
assert_eq!(normalized, raw_memory);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -127,6 +127,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
|||
}
|
||||
files.sort_unstable();
|
||||
assert_eq!(files.len(), 1);
|
||||
let canonical_rollout_summary_file = &files[0];
|
||||
|
||||
let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root))
|
||||
.await
|
||||
|
|
@ -134,6 +135,30 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only
|
|||
assert!(raw_memories.contains("raw memory"));
|
||||
assert!(raw_memories.contains(&keep_id));
|
||||
assert!(raw_memories.contains("cwd: /tmp/workspace"));
|
||||
assert!(raw_memories.contains(&format!(
|
||||
"rollout_summary_file: {canonical_rollout_summary_file}"
|
||||
)));
|
||||
let thread_header = format!("## Thread `{keep_id}`");
|
||||
let thread_pos = raw_memories
|
||||
.find(&thread_header)
|
||||
.expect("thread header should exist");
|
||||
let updated_pos = raw_memories[thread_pos..]
|
||||
.find("updated_at: ")
|
||||
.map(|offset| thread_pos + offset)
|
||||
.expect("updated_at should exist after thread header");
|
||||
let cwd_pos = raw_memories[thread_pos..]
|
||||
.find("cwd: /tmp/workspace")
|
||||
.map(|offset| thread_pos + offset)
|
||||
.expect("cwd should exist after thread header");
|
||||
let file_pos = raw_memories[thread_pos..]
|
||||
.find(&format!(
|
||||
"rollout_summary_file: {canonical_rollout_summary_file}"
|
||||
))
|
||||
.map(|offset| thread_pos + offset)
|
||||
.expect("rollout_summary_file should exist after thread header");
|
||||
assert!(thread_pos < updated_pos);
|
||||
assert!(updated_pos < cwd_pos);
|
||||
assert!(cwd_pos < file_pos);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
|
|
@ -229,7 +254,7 @@ async fn sync_rollout_summaries_uses_timestamp_hash_and_sanitized_slug_filename(
|
|||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_filename() {
|
||||
async fn rebuild_raw_memories_file_adds_canonical_rollout_summary_file_header() {
|
||||
let dir = tempdir().expect("tempdir");
|
||||
let root = dir.path().join("memory");
|
||||
ensure_layout(&root).await.expect("ensure layout");
|
||||
|
|
@ -241,11 +266,20 @@ async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_fi
|
|||
source_updated_at: Utc.timestamp_opt(200, 0).single().expect("timestamp"),
|
||||
raw_memory: "\
|
||||
---
|
||||
rollout_summary_file: state_migration_uniqueness_test.md
|
||||
description: Added a migration test
|
||||
keywords: codex-state, migrations
|
||||
---
|
||||
- Kept details."
|
||||
### Task 1: migration-test
|
||||
task: add-migration-test
|
||||
task_group: codex-state
|
||||
task_outcome: success
|
||||
- Added regression coverage for migration uniqueness.
|
||||
|
||||
### Task 2: validate-migration
|
||||
task: validate-migration-ordering
|
||||
task_group: codex-state
|
||||
task_outcome: success
|
||||
- Confirmed no ordering regressions."
|
||||
.to_string(),
|
||||
rollout_summary: "short summary".to_string(),
|
||||
rollout_slug: Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345".to_string()),
|
||||
|
|
@ -285,8 +319,11 @@ keywords: codex-state, migrations
|
|||
assert!(raw_memories.contains(&format!(
|
||||
"rollout_summary_file: {canonical_rollout_summary_file}"
|
||||
)));
|
||||
assert!(!raw_memories.contains("rollout_summary_file: state_migration_uniqueness_test.md"));
|
||||
assert!(raw_memories.contains("description: Added a migration test"));
|
||||
assert!(raw_memories.contains("### Task 1: migration-test"));
|
||||
assert!(raw_memories.contains("task: add-migration-test"));
|
||||
assert!(raw_memories.contains("task_group: codex-state"));
|
||||
assert!(raw_memories.contains("task_outcome: success"));
|
||||
}
|
||||
|
||||
mod phase2 {
|
||||
|
|
|
|||
|
|
@ -17,7 +17,8 @@ CONTEXT: MEMORY FOLDER STRUCTURE
|
|||
|
||||
Folder structure (under {{ memory_root }}/):
|
||||
- memory_summary.md
|
||||
- Always loaded into the system prompt. Must remain tiny and highly navigational.
|
||||
- Always loaded into the system prompt. Must remain informative and highly navigational,
|
||||
but still discriminative enough to guide retrieval.
|
||||
- MEMORY.md
|
||||
- Handbook entries. Used to grep for keywords; aggregated insights from rollouts;
|
||||
pointers to rollout summaries if certain past rollouts are very relevant.
|
||||
|
|
@ -40,8 +41,10 @@ GLOBAL SAFETY, HYGIENE, AND NO-FILLER RULES (STRICT)
|
|||
- Evidence-based only: do not invent facts or claim verification that did not happen.
|
||||
- Redact secrets: never store tokens/keys/passwords; replace with [REDACTED_SECRET].
|
||||
- Avoid copying large tool outputs. Prefer compact summaries + exact error snippets + pointers.
|
||||
- **No-op is allowed and preferred** when there is no meaningful, reusable learning worth saving.
|
||||
- If nothing is worth saving, make NO file changes.
|
||||
- No-op content updates are allowed and preferred when there is no meaningful, reusable
|
||||
learning worth saving.
|
||||
- INIT mode: still create minimal required files (`MEMORY.md` and `memory_summary.md`).
|
||||
- INCREMENTAL UPDATE mode: if nothing is worth saving, make no file changes.
|
||||
|
||||
============================================================
|
||||
WHAT COUNTS AS HIGH-SIGNAL MEMORY
|
||||
|
|
@ -97,7 +100,10 @@ Primary inputs (always read these, if exists):
|
|||
Under `{{ memory_root }}/`:
|
||||
- `raw_memories.md`
|
||||
- mechanical merge of `raw_memories` from Phase 1;
|
||||
- source of rollout-level metadata needed for MEMORY.md header annotations;
|
||||
- ordered latest-first; use this recency ordering as a major heuristic when choosing
|
||||
what to promote, expand, or deprecate;
|
||||
- source of rollout-level metadata needed for MEMORY.md `### rollout_summary_files`
|
||||
annotations;
|
||||
you should be able to find `cwd` and `updated_at` there.
|
||||
- `MEMORY.md`
|
||||
- merged memories; produce a lightly clustered version if applicable
|
||||
|
|
@ -123,45 +129,148 @@ Rules:
|
|||
- If there is no meaningful signal to add beyond what already exists, keep outputs minimal.
|
||||
- You should always make sure `MEMORY.md` and `memory_summary.md` exist and are up to date.
|
||||
- Follow the format and schema of the artifacts below.
|
||||
- Do not target fixed counts (memory blocks, task groups, topics, or bullets). Let the
|
||||
signal determine the granularity and depth.
|
||||
- Quality objective: for high-signal task families, `MEMORY.md` should be materially more
|
||||
useful than `raw_memories.md` while remaining easy to navigate.
|
||||
|
||||
============================================================
|
||||
1) `MEMORY.md` FORMAT (STRICT)
|
||||
============================================================
|
||||
|
||||
Clustered schema:
|
||||
---
|
||||
rollout_summary_files:
|
||||
- <file1.md> (<annotation that includes status/usefulness, cwd, and updated_at, e.g. "success, most useful architecture walkthrough, cwd=/repo/path, updated_at=2026-02-12T10:30:00Z">)
|
||||
- <file2.md> (<annotation with cwd=/..., updated_at=...>)
|
||||
description: brief description of the shared tasks/outcomes
|
||||
keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo concepts, contracts)>
|
||||
---
|
||||
`MEMORY.md` is the durable, retrieval-oriented handbook. Each block should be easy to grep
|
||||
and rich enough to reuse without reopening raw rollout logs.
|
||||
|
||||
Each memory block MUST start with:
|
||||
|
||||
# Task Group: <repo / project / workflow / detail-task family; broad but distinguishable>
|
||||
|
||||
scope: <what this block covers, when to use it, and notable boundaries>
|
||||
|
||||
- `Task Group` is for retrieval. Choose granularity based on memory density:
|
||||
repo / project / workflow / detail-task family.
|
||||
- `scope:` is for scanning. Keep it short and operational.
|
||||
|
||||
Body format (strict):
|
||||
|
||||
- Use the task-grouped markdown structure below (headings + bullets). Do not use a flat
|
||||
bullet dump.
|
||||
- The header (`# Task Group: ...` + `scope: ...`) is the index. The body contains
|
||||
task-level detail.
|
||||
- Every `## Task <n>` section MUST include task-local rollout files, task-local keywords,
|
||||
and task-specific learnings.
|
||||
- Use `-` bullets for lists and learnings. Do not use `*`.
|
||||
- No bolding text in the memory body.
|
||||
|
||||
Required task-oriented body shape (strict):
|
||||
|
||||
## Task 1: <task description, outcome>
|
||||
|
||||
task: <specific, searchable task signature; avoid fluff>
|
||||
|
||||
### rollout_summary_files
|
||||
|
||||
- <rollout_summaries/file1.md> (cwd=<path>, updated_at=<timestamp>, <optional status/usefulness note>)
|
||||
|
||||
### keywords
|
||||
|
||||
- <task-local retrieval handles: tool names, error strings, repo concepts, APIs/contracts>
|
||||
|
||||
### learnings
|
||||
|
||||
- <task-specific learnings>
|
||||
- <user expectation, preference, style, tone, feedback>
|
||||
- <what worked, what failed, validation, reusable procedure, etc.>
|
||||
- <failure shields: symptom -> cause -> fix>
|
||||
- <scope boundaries / anti-drift notes when relevant>
|
||||
- <uncertainty explicitly preserved if unresolved>
|
||||
|
||||
## Task 2: <task description, outcome>
|
||||
|
||||
task: <specific, searchable task signature; avoid fluff>
|
||||
|
||||
### rollout_summary_files
|
||||
|
||||
- <Structured memory entries. Use bullets. No bolding text.>
|
||||
- ...
|
||||
|
||||
Schema rules (strict):
|
||||
- Keep entries compact and retrieval-friendly.
|
||||
- A single note block may correspond to multiple related tasks; aggregate when tasks and lessons align.
|
||||
- In `rollout_summary_files`, each parenthesized annotation must include
|
||||
`cwd=<path>` and `updated_at=<timestamp>` copied from that rollout summary metadata.
|
||||
If missing from an individual rollout summary, recover them from `raw_memories.md`.
|
||||
- If you need to reference skills, do it in the BODY as bullets, not in the header
|
||||
(e.g., "- Related skill: skills/<skill-name>/SKILL.md").
|
||||
- Use lowercase, hyphenated skill folder names.
|
||||
- Preserve provenance: include the relevant rollout_summary_file(s) for the block.
|
||||
### keywords
|
||||
|
||||
What to write in memory entries: Extract the highest-signal takeaways from the rollout
|
||||
summaries, especially from "User preferences", "Reusable knowledge", "References", and
|
||||
"Things that did not work / things that can be improved".
|
||||
Write what would most help a future agent doing a similar (or adjacent) task: decision
|
||||
triggers, key steps, proven commands/paths, and failure shields (symptom -> cause -> fix),
|
||||
plus any stable user preferences.
|
||||
If a rollout summary contains stable user profile details or preferences that generalize,
|
||||
capture them here so they're easy to find and can be reflected in memory_summary.md.
|
||||
The goal of MEMORY.md is to support related-but-not-identical future tasks, so keep
|
||||
insights slightly more general; when a future task is very similar, expect the agent to
|
||||
use the rollout summary for full detail.
|
||||
- ...
|
||||
|
||||
### learnings
|
||||
|
||||
- <task-specific memories / learnings>
|
||||
|
||||
... More `## Task <n>` sections if needed
|
||||
|
||||
## General Tips
|
||||
|
||||
- <cross-task guidance, deduplicated and generalized> [Task 1]
|
||||
- <conflict/staleness resolution note using task references> [Task 1][Task 2]
|
||||
- <structured memory bullets; no bolding>
|
||||
|
||||
Schema rules (strict):
|
||||
- A) Structure and consistency
|
||||
- Exact block shape: `# Task Group`, `scope:`, one or more `## Task <n>`, and
|
||||
`## General Tips`.
|
||||
- Keep all tasks and tips inside the task family implied by the block header.
|
||||
- Keep entries retrieval-friendly, but not shallow.
|
||||
- Do not emit placeholder values (`task: task`, `# Task Group: misc`, `scope: general`, etc.).
|
||||
- B) Task boundaries and clustering
|
||||
- Primary organization unit is the task (`## Task <n>`), not the rollout file.
|
||||
- Default mapping: one coherent rollout summary -> one MEMORY block -> one `## Task 1`.
|
||||
- If a rollout contains multiple distinct tasks, split them into multiple `## Task <n>`
|
||||
sections. If those tasks belong to different task families, split into separate
|
||||
MEMORY blocks (`# Task Group`).
|
||||
- A MEMORY block may include multiple rollouts only when they belong to the same
|
||||
task group and the task intent, technical context, and outcome pattern align.
|
||||
- A single `## Task <n>` section may cite multiple rollout summaries when they are
|
||||
iterative attempts or follow-up runs for the same task.
|
||||
- Do not cluster on keyword overlap alone.
|
||||
- When in doubt, preserve boundaries (separate tasks/blocks) rather than over-cluster.
|
||||
- C) Provenance and metadata
|
||||
- Every `## Task <n>` section must include `### rollout_summary_files`, `### keywords`,
|
||||
and `### learnings`.
|
||||
- `### rollout_summary_files` must be task-local (not a block-wide catch-all list).
|
||||
- Each rollout annotation must include `cwd=<path>` and `updated_at=<timestamp>`.
|
||||
If missing from a rollout summary, recover them from `raw_memories.md`.
|
||||
- Major learnings should be traceable to rollout summaries listed in the same task section.
|
||||
- Order rollout references by freshness and practical usefulness.
|
||||
- D) Retrieval and references
|
||||
- `task:` lines must be specific and searchable.
|
||||
- `### keywords` should be discriminative and task-local (tool names, error strings,
|
||||
repo concepts, APIs/contracts).
|
||||
- Put task-specific detail in `## Task <n>` and only deduplicated cross-task guidance in
|
||||
`## General Tips`.
|
||||
- If you reference skills, do it in body bullets only (for example:
|
||||
`- Related skill: skills/<skill-name>/SKILL.md`).
|
||||
- Use lowercase, hyphenated skill folder names.
|
||||
- E) Ordering and conflict handling
|
||||
- For grouped blocks, order `## Task <n>` sections by practical usefulness, then recency.
|
||||
- Treat `updated_at` as a first-class signal: fresher validated evidence usually wins.
|
||||
- If evidence conflicts and validation is unclear, preserve the uncertainty explicitly.
|
||||
- In `## General Tips`, cite task references (`[Task 1]`, `[Task 2]`, etc.) when
|
||||
merging, deduplicating, or resolving evidence.
|
||||
|
||||
What to write:
|
||||
- Extract the takeaways from rollout summaries and raw_memories, especially sections like
|
||||
"User preferences", "Reusable knowledge", "References", and "Things that did not work".
|
||||
- Optimize for future related tasks: decision triggers, validated commands/paths,
|
||||
verification steps, and failure shields (symptom -> cause -> fix).
|
||||
- Capture stable user preferences/details that generalize so they can also inform
|
||||
`memory_summary.md`.
|
||||
- `MEMORY.md` should support related-but-not-identical tasks: slightly more general than a
|
||||
rollout summary, but still operational and concrete.
|
||||
- Use `raw_memories.md` as the routing layer; deep-dive into `rollout_summaries/*.md` when:
|
||||
- the task is high-value and needs richer detail,
|
||||
- multiple rollouts overlap and need conflict/staleness resolution,
|
||||
- raw memory wording is too terse/ambiguous to consolidate confidently,
|
||||
- you need stronger evidence, validation context, or user feedback.
|
||||
- Each block should be useful on its own and materially richer than `memory_summary.md`:
|
||||
- include concrete triggers, commands/paths, and failure shields,
|
||||
- include outcome-specific notes (what worked, what failed, what remains uncertain),
|
||||
- include scope boundaries / anti-drift notes when they affect future task success,
|
||||
- include stale/conflict notes when newer evidence changes prior guidance.
|
||||
|
||||
============================================================
|
||||
2) `memory_summary.md` FORMAT (STRICT)
|
||||
|
|
@ -210,17 +319,23 @@ For example, include (when known):
|
|||
## What's in Memory
|
||||
This is a compact index to help future agents quickly find details in `MEMORY.md`,
|
||||
`skills/`, and `rollout_summaries/`.
|
||||
Organize by topic. Each bullet should include: topic, keywords (used to search over
|
||||
memory files), and a brief description.
|
||||
Organize by topic. Each bullet must include: topic, keywords, and a clear description.
|
||||
Ordered by utility - which is the most likely to be useful for a future agent.
|
||||
Do not target a fixed topic count. Cover the real high-signal areas and omit low-signal noise.
|
||||
Prefer grouping by task family / workflow intent, not by incidental tools alone.
|
||||
|
||||
Recommended format:
|
||||
- <topic>: <keyword1>, <keyword2>, <keyword3>, ...
|
||||
- desc: <brief description>
|
||||
- desc: <clear and specific description of what is inside this topic and when to use it>
|
||||
|
||||
Notes:
|
||||
- Do not include large snippets; push details into MEMORY.md and rollout summaries.
|
||||
- Prefer topics/keywords that help a future agent search MEMORY.md efficiently.
|
||||
- Prefer clear topic taxonomy over verbose drill-down pointers.
|
||||
- Keep descriptions explicit enough that a future model can decide which keyword cluster
|
||||
to search first for a new user query.
|
||||
- Topic descriptions should mention what is inside, when to use it, and what kind of
|
||||
outcome/procedure depth is available (for example: runbook, diagnostics, reporting, recovery).
|
||||
|
||||
============================================================
|
||||
3) `skills/` FORMAT (optional)
|
||||
|
|
@ -303,29 +418,41 @@ WORKFLOW
|
|||
- create initial `skills/*` (optional but highly recommended)
|
||||
- write `memory_summary.md` last (highest-signal file)
|
||||
- Use your best efforts to get the most high-quality memory files
|
||||
- Do not be lazy at browsing files at the INIT phase
|
||||
- Do not be lazy at browsing files in INIT mode; deep-dive high-value rollouts and
|
||||
conflicting task families until MEMORY blocks are richer and more useful than raw memories
|
||||
|
||||
3) INCREMENTAL UPDATE behavior:
|
||||
- Treat `raw_memories.md` as the primary source of NEW signal.
|
||||
- Read existing memory files first for continuity.
|
||||
- Integrate new signal into existing artifacts by:
|
||||
- scanning new raw memories in recency order and identifying which existing blocks they should update
|
||||
- updating existing knowledge with better/newer evidence
|
||||
- updating stale or contradicting guidance
|
||||
- expanding terse old blocks when new summaries/raw memories make the task family clearer
|
||||
- doing light clustering and merging if needed
|
||||
- updating existing skills or adding new skills only when there is clear new reusable procedure
|
||||
- update `memory_summary.md` last to reflect the final state of the memory folder
|
||||
|
||||
4) For both modes, update `MEMORY.md` after skill updates:
|
||||
- add clear **Related skills** pointers in the BODY of corresponding note blocks (do
|
||||
not change the YAML header schema)
|
||||
4) Evidence deep-dive rule (both modes):
|
||||
- `raw_memories.md` is the routing layer, not always the final authority for detail.
|
||||
- When a task family is important, ambiguous, or duplicated across multiple rollouts,
|
||||
open the relevant `rollout_summaries/*.md` files and extract richer procedural detail,
|
||||
validation signals, and user feedback before finalizing `MEMORY.md`.
|
||||
- Use `updated_at` and validation strength together to resolve stale/conflicting notes.
|
||||
|
||||
5) Housekeeping (optional):
|
||||
5) For both modes, update `MEMORY.md` after skill updates:
|
||||
- add clear related-skill pointers as plain bullets in the BODY of corresponding task
|
||||
sections (do not change the `# Task Group` / `scope:` block header format)
|
||||
|
||||
6) Housekeeping (optional):
|
||||
- remove clearly redundant/low-signal rollout summaries
|
||||
- if multiple summaries overlap for the same thread, keep the best one
|
||||
|
||||
6) Final pass:
|
||||
7) Final pass:
|
||||
- remove duplication in memory_summary, skills/, and MEMORY.md
|
||||
- ensure any referenced skills/summaries actually exist
|
||||
- ensure MEMORY blocks and "What's in Memory" use a consistent task-oriented taxonomy
|
||||
- ensure recent important task families are easy to find (description + keywords + topic wording)
|
||||
- if there is no net-new or higher-quality signal to add, keep changes minimal (no
|
||||
churn for its own sake).
|
||||
|
||||
|
|
@ -341,6 +468,6 @@ Use `rg` for fast retrieval while consolidating:
|
|||
- Search durable notes:
|
||||
`rg -n -i "<pattern>" "{{ memory_root }}/MEMORY.md"`
|
||||
- Search across memory tree:
|
||||
`rg -n -i "<pattern>" "{{ memory_root }}" | head -n 50`
|
||||
`rg -n -i "<pattern>" "{{ memory_root }}" | head -n 100`
|
||||
- Locate rollout summary files:
|
||||
`rg --files "{{ memory_root }}/rollout_summaries" | head -n 200`
|
||||
`rg --files "{{ memory_root }}/rollout_summaries" | head -n 400`
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ again)
|
|||
- {{ base_path }}/rollout_summaries/ (per-rollout recaps + evidence snippets)
|
||||
|
||||
Quick memory pass (when applicable):
|
||||
1) Skim the MEMORY_SUMMARY included below and extract a few task-relevant
|
||||
keywords (for example repo/module names, error strings, etc.).
|
||||
1) Skim the MEMORY_SUMMARY included below and extract task-relevant topics and
|
||||
keywords (for example repo/module names, workflows, error strings, etc.).
|
||||
2) Search {{ base_path }}/MEMORY.md for those keywords, and for any referenced
|
||||
rollout summary files and skills.
|
||||
3) If relevant rollout summary files and skills exist, open matching files
|
||||
|
|
|
|||
|
|
@ -109,6 +109,16 @@ Typical real-world signals (use as examples when analyzing the rollout):
|
|||
3) User keeps iterating on the same task:
|
||||
- Requests for fixes/revisions on the same artifact usually mean partial, not success.
|
||||
- Requesting a restart or pointing out contradictions often indicates fail.
|
||||
4) Last task in the rollout:
|
||||
- Treat the final task more conservatively than earlier tasks.
|
||||
- If there is no explicit user feedback or environment validation for the final task,
|
||||
prefer `uncertain` (or `partial` if there was obvious progress but no confirmation).
|
||||
- For non-final tasks, switching to another task without unresolved blockers is a stronger
|
||||
positive signal.
|
||||
|
||||
Signal priority:
|
||||
- Explicit user feedback and explicit environment/test/tool validation outrank all heuristics.
|
||||
- If heuristic signals conflict with explicit feedback, follow explicit feedback.
|
||||
|
||||
Fallback heuristics:
|
||||
- Success: explicit "done/works", tests pass, correct artifact produced, user
|
||||
|
|
@ -152,6 +162,8 @@ This summary should be very comprehensive and detailed, because it will be furth
|
|||
distilled into MEMORY.md and memory_summary.md.
|
||||
There is no strict size limit, and you should feel free to list a lot of points here as
|
||||
long as they are helpful.
|
||||
Do not target fixed counts (tasks, bullets, references, or topics). Let the rollout's
|
||||
signal density decide how much to write.
|
||||
Instructional notes in angle brackets are guidance only; do not include them verbatim in the rollout summary.
|
||||
|
||||
Template (items are flexible; include only what is useful):
|
||||
|
|
@ -170,7 +182,7 @@ User preferences: <explicit or inferred from user messages; include how you infe
|
|||
|
||||
<Then followed by tasks in this rollout. Each task is a section; sections below are optional per task.>
|
||||
|
||||
## Task <idx>: <short task name>
|
||||
## Task <idx>: <task name>
|
||||
Outcome: <success|partial|fail|uncertain>
|
||||
|
||||
Key steps:
|
||||
|
|
@ -188,9 +200,9 @@ Things that did not work / things that can be improved:
|
|||
user approval.">
|
||||
- ...
|
||||
|
||||
Reusable knowledge: <you are encouraged to list 3-10 points for each task here, anything
|
||||
helpful counts, stick to facts. Don't put opinions or suggestions from the assistant
|
||||
that are not validated by the user.>
|
||||
Reusable knowledge: <list as many durable, evidence-backed points as needed for this task.
|
||||
Anything helpful counts; stick to facts. Don't put vague opinions or suggestions from the
|
||||
assistant that are not validated.>
|
||||
- <facts that will be helpful for future agents, such as how the system works, anything
|
||||
that took the agent some effort to figure out, user preferences, etc.>
|
||||
- <e.g. "When running evals, you should pass in the flag `some flag
|
||||
|
|
@ -226,22 +238,70 @@ shows or why it matters>:
|
|||
- [3] final verification evidence or explicit user feedback
|
||||
|
||||
|
||||
## Task <idx> (if there are multiple tasks): <short task name>
|
||||
## Task <idx> (if there are multiple tasks): <task name>
|
||||
...
|
||||
|
||||
Task section quality bar (strict):
|
||||
- Each task section should be detailed enough that other agent can understand it without
|
||||
reopening the raw rollout.
|
||||
- For each task, cover the following when evidence exists (and state uncertainty when it
|
||||
does not):
|
||||
- what the user wanted / expected,
|
||||
- what was attempted and what actually worked,
|
||||
- what failed or remained uncertain and why,
|
||||
- how the outcome was validated (user feedback, tests, tool output, or explicit lack of validation),
|
||||
- reusable procedure/checklist and failure shields,
|
||||
- concrete artifacts/commands/paths/error signatures that future agents can reuse.
|
||||
- Do not be terse in task sections. Rich, evidence-backed task summaries are preferred
|
||||
over compact summaries.
|
||||
|
||||
============================================================
|
||||
`raw_memory` FORMAT (STRICT)
|
||||
============================================================
|
||||
|
||||
The schema is below.
|
||||
---
|
||||
rollout_summary_file: <file.md>
|
||||
description: brief description of the task and outcome
|
||||
description: concise but information-dense description of the primary task(s), outcome, and highest-value takeaway
|
||||
task: <primary_task_signature>
|
||||
task_group: <repo_or_workflow_bucket>
|
||||
task_outcome: <success|partial|fail|uncertain>
|
||||
keywords: k1, k2, k3, ... <searchable handles (tool names, error names, repo concepts, contracts)>
|
||||
---
|
||||
- <Structured memory entries. Use bullets. No bolding text.>
|
||||
|
||||
Then write task-grouped body content (required):
|
||||
### Task 1: <short task name>
|
||||
task: <task signature for this task>
|
||||
task_group: <project/workflow topic>
|
||||
task_outcome: <success|partial|fail|uncertain>
|
||||
- <useful memory bullet>
|
||||
- ...
|
||||
|
||||
### Task 2: <short task name> (if needed)
|
||||
task: ...
|
||||
task_group: ...
|
||||
task_outcome: ...
|
||||
- ...
|
||||
|
||||
Preferred task-block body shape (strongly recommended):
|
||||
- `### Task <n>` blocks should preserve task-specific retrieval signal and consolidation-ready detail.
|
||||
- Within each task block, include bullets that explicitly cover (when applicable):
|
||||
- user goal / expected outcome,
|
||||
- what worked (key steps, commands, code paths, artifacts),
|
||||
- what did not work or drifted (and what pivot worked),
|
||||
- validation state (user confirmation, tests, runtime checks, or missing validation),
|
||||
- reusable procedure/checklist and failure shields,
|
||||
- high-signal evidence pointers (error strings, commands, files, IDs, URLs, etc.).
|
||||
- Prefer labeled bullets when useful (for example: `- User goal: ...`, `- Validation: ...`,
|
||||
`- Failure shield: ...`) so Phase 2 can retrieve and consolidate faster.
|
||||
|
||||
Task grouping rules (strict):
|
||||
- Every distinct user task in the thread must appear as its own `### Task <n>` block.
|
||||
- Do not merge unrelated tasks into one block just because they happen in the same thread.
|
||||
- If a thread contains only one task, keep exactly one task block.
|
||||
- For each task block, keep the outcome tied to evidence relevant to that task.
|
||||
- If a thread has partially related tasks, prefer splitting into separate task blocks and
|
||||
linking them through shared keywords rather than merging.
|
||||
|
||||
What to write in memory entries: Extract useful takeaways from the rollout summaries,
|
||||
especially from "User preferences", "Reusable knowledge", "References", and
|
||||
"Things that did not work / things that can be improved".
|
||||
|
|
@ -249,10 +309,17 @@ Write what would help a future agent doing a similar (or adjacent) task: decisio
|
|||
triggers, key steps, proven commands/paths, and failure shields (symptom -> cause -> fix),
|
||||
plus any stable user preferences.
|
||||
If a rollout summary contains stable user profile details or preferences that generalize,
|
||||
capture them here so they're easy to find and can be reflected in memory_summary.md.
|
||||
capture them here so they're easy to find without checking rollout summary.
|
||||
The goal is to support related-but-not-identical future tasks, so keep
|
||||
insights slightly more general; when a future task is very similar, expect the agent to
|
||||
use the rollout summary for full detail.
|
||||
For each task block, include enough detail to be useful for future agent reference:
|
||||
- what the user wanted and expected,
|
||||
- what was attempted and what actually worked,
|
||||
- what failed or remained uncertain and why,
|
||||
- what evidence validates the outcome (user feedback, environment/test feedback, or lack of both),
|
||||
- reusable procedures/checklists and failure shields that should survive future similar tasks,
|
||||
- artifacts and retrieval handles (commands, file paths, error strings, IDs) that make the task easy to rediscover.
|
||||
|
||||
|
||||
============================================================
|
||||
|
|
@ -264,4 +331,6 @@ WORKFLOW
|
|||
1) Triage outcome using the common rules.
|
||||
2) Read the rollout carefully (do not miss user messages/tool calls/outputs).
|
||||
3) Return `rollout_summary`, `rollout_slug`, and `raw_memory`, valid JSON only.
|
||||
No markdown wrapper, no prose outside JSON.
|
||||
No markdown wrapper, no prose outside JSON.
|
||||
|
||||
- Do not be terse in task sections. Include validation signal, failure mode, and reusable procedure per task when available.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue