diff --git a/codex-rs/core/src/memories/storage.rs b/codex-rs/core/src/memories/storage.rs index ea2b00acf..1a07dbe9b 100644 --- a/codex-rs/core/src/memories/storage.rs +++ b/codex-rs/core/src/memories/storage.rs @@ -82,11 +82,11 @@ async fn rebuild_raw_memories_file( ) .map_err(raw_memories_format_error)?; writeln!(body, "cwd: {}", memory.cwd.display()).map_err(raw_memories_format_error)?; - writeln!(body).map_err(raw_memories_format_error)?; let rollout_summary_file = format!("{}.md", rollout_summary_file_stem(memory)); - let raw_memory = - replace_rollout_summary_file_in_raw_memory(&memory.raw_memory, &rollout_summary_file); - body.push_str(raw_memory.trim()); + writeln!(body, "rollout_summary_file: {rollout_summary_file}") + .map_err(raw_memories_format_error)?; + writeln!(body).map_err(raw_memories_format_error)?; + body.push_str(memory.raw_memory.trim()); body.push_str("\n\n"); } @@ -161,26 +161,6 @@ fn rollout_summary_format_error(err: std::fmt::Error) -> std::io::Error { std::io::Error::other(format!("format rollout summary: {err}")) } -fn replace_rollout_summary_file_in_raw_memory( - raw_memory: &str, - rollout_summary_file: &str, -) -> String { - const ROLLOUT_SUMMARY_PREFIX: &str = "rollout_summary_file: "; - - let replacement = format!("rollout_summary_file: {rollout_summary_file}"); - raw_memory - .split('\n') - .map(|line| { - if line.starts_with(ROLLOUT_SUMMARY_PREFIX) { - replacement.as_str() - } else { - line - } - }) - .collect::>() - .join("\n") -} - pub(crate) fn rollout_summary_file_stem(memory: &Stage1Output) -> String { rollout_summary_file_stem_from_parts( memory.thread_id, @@ -270,7 +250,6 @@ pub(super) fn rollout_summary_file_stem_from_parts( #[cfg(test)] mod tests { - use super::replace_rollout_summary_file_in_raw_memory; use super::rollout_summary_file_stem; use super::rollout_summary_file_stem_from_parts; use chrono::TimeZone; @@ -339,71 +318,4 @@ mod tests { assert_eq!(rollout_summary_file_stem(&memory), FIXED_PREFIX); } - - #[test] - fn replace_rollout_summary_file_in_raw_memory_replaces_existing_value() { - let raw_memory = "\ ---- -rollout_summary_file: wrong.md -description: demo -keywords: one, two ---- -- body line"; - let normalized = replace_rollout_summary_file_in_raw_memory( - raw_memory, - "2025-01-01T00-00-00-abcd-demo.md", - ); - - assert_eq!( - normalized, - "\ ---- -rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md -description: demo -keywords: one, two ---- -- body line" - ); - } - - #[test] - fn replace_rollout_summary_file_in_raw_memory_replaces_placeholder() { - let raw_memory = "\ ---- -rollout_summary_file: -description: demo -keywords: one, two ---- -- body line"; - let normalized = replace_rollout_summary_file_in_raw_memory( - raw_memory, - "2025-01-01T00-00-00-abcd-demo.md", - ); - - assert_eq!( - normalized, - "\ ---- -rollout_summary_file: 2025-01-01T00-00-00-abcd-demo.md -description: demo -keywords: one, two ---- -- body line" - ); - } - - #[test] - fn replace_rollout_summary_file_in_raw_memory_leaves_text_without_field_unchanged() { - let raw_memory = "\ ---- -description: demo -keywords: one, two ---- -- body line"; - let normalized = replace_rollout_summary_file_in_raw_memory( - raw_memory, - "2025-01-01T00-00-00-abcd-demo.md", - ); - assert_eq!(normalized, raw_memory); - } } diff --git a/codex-rs/core/src/memories/tests.rs b/codex-rs/core/src/memories/tests.rs index 7d86f75c9..b47e7f8aa 100644 --- a/codex-rs/core/src/memories/tests.rs +++ b/codex-rs/core/src/memories/tests.rs @@ -127,6 +127,7 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only } files.sort_unstable(); assert_eq!(files.len(), 1); + let canonical_rollout_summary_file = &files[0]; let raw_memories = tokio::fs::read_to_string(raw_memories_file(&root)) .await @@ -134,6 +135,30 @@ async fn sync_rollout_summaries_and_raw_memories_file_keeps_latest_memories_only assert!(raw_memories.contains("raw memory")); assert!(raw_memories.contains(&keep_id)); assert!(raw_memories.contains("cwd: /tmp/workspace")); + assert!(raw_memories.contains(&format!( + "rollout_summary_file: {canonical_rollout_summary_file}" + ))); + let thread_header = format!("## Thread `{keep_id}`"); + let thread_pos = raw_memories + .find(&thread_header) + .expect("thread header should exist"); + let updated_pos = raw_memories[thread_pos..] + .find("updated_at: ") + .map(|offset| thread_pos + offset) + .expect("updated_at should exist after thread header"); + let cwd_pos = raw_memories[thread_pos..] + .find("cwd: /tmp/workspace") + .map(|offset| thread_pos + offset) + .expect("cwd should exist after thread header"); + let file_pos = raw_memories[thread_pos..] + .find(&format!( + "rollout_summary_file: {canonical_rollout_summary_file}" + )) + .map(|offset| thread_pos + offset) + .expect("rollout_summary_file should exist after thread header"); + assert!(thread_pos < updated_pos); + assert!(updated_pos < cwd_pos); + assert!(cwd_pos < file_pos); } #[tokio::test] @@ -229,7 +254,7 @@ async fn sync_rollout_summaries_uses_timestamp_hash_and_sanitized_slug_filename( } #[tokio::test] -async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_filename() { +async fn rebuild_raw_memories_file_adds_canonical_rollout_summary_file_header() { let dir = tempdir().expect("tempdir"); let root = dir.path().join("memory"); ensure_layout(&root).await.expect("ensure layout"); @@ -241,11 +266,20 @@ async fn rebuild_raw_memories_file_rewrites_rollout_summary_file_to_canonical_fi source_updated_at: Utc.timestamp_opt(200, 0).single().expect("timestamp"), raw_memory: "\ --- -rollout_summary_file: state_migration_uniqueness_test.md description: Added a migration test keywords: codex-state, migrations --- -- Kept details." +### Task 1: migration-test +task: add-migration-test +task_group: codex-state +task_outcome: success +- Added regression coverage for migration uniqueness. + +### Task 2: validate-migration +task: validate-migration-ordering +task_group: codex-state +task_outcome: success +- Confirmed no ordering regressions." .to_string(), rollout_summary: "short summary".to_string(), rollout_slug: Some("Unsafe Slug/With Spaces & Symbols + EXTRA_LONG_12345".to_string()), @@ -285,8 +319,11 @@ keywords: codex-state, migrations assert!(raw_memories.contains(&format!( "rollout_summary_file: {canonical_rollout_summary_file}" ))); - assert!(!raw_memories.contains("rollout_summary_file: state_migration_uniqueness_test.md")); assert!(raw_memories.contains("description: Added a migration test")); + assert!(raw_memories.contains("### Task 1: migration-test")); + assert!(raw_memories.contains("task: add-migration-test")); + assert!(raw_memories.contains("task_group: codex-state")); + assert!(raw_memories.contains("task_outcome: success")); } mod phase2 { diff --git a/codex-rs/core/templates/memories/consolidation.md b/codex-rs/core/templates/memories/consolidation.md index de0965082..91096c1dd 100644 --- a/codex-rs/core/templates/memories/consolidation.md +++ b/codex-rs/core/templates/memories/consolidation.md @@ -17,7 +17,8 @@ CONTEXT: MEMORY FOLDER STRUCTURE Folder structure (under {{ memory_root }}/): - memory_summary.md - - Always loaded into the system prompt. Must remain tiny and highly navigational. + - Always loaded into the system prompt. Must remain informative and highly navigational, + but still discriminative enough to guide retrieval. - MEMORY.md - Handbook entries. Used to grep for keywords; aggregated insights from rollouts; pointers to rollout summaries if certain past rollouts are very relevant. @@ -40,8 +41,10 @@ GLOBAL SAFETY, HYGIENE, AND NO-FILLER RULES (STRICT) - Evidence-based only: do not invent facts or claim verification that did not happen. - Redact secrets: never store tokens/keys/passwords; replace with [REDACTED_SECRET]. - Avoid copying large tool outputs. Prefer compact summaries + exact error snippets + pointers. -- **No-op is allowed and preferred** when there is no meaningful, reusable learning worth saving. - - If nothing is worth saving, make NO file changes. +- No-op content updates are allowed and preferred when there is no meaningful, reusable + learning worth saving. + - INIT mode: still create minimal required files (`MEMORY.md` and `memory_summary.md`). + - INCREMENTAL UPDATE mode: if nothing is worth saving, make no file changes. ============================================================ WHAT COUNTS AS HIGH-SIGNAL MEMORY @@ -97,7 +100,10 @@ Primary inputs (always read these, if exists): Under `{{ memory_root }}/`: - `raw_memories.md` - mechanical merge of `raw_memories` from Phase 1; - - source of rollout-level metadata needed for MEMORY.md header annotations; + - ordered latest-first; use this recency ordering as a major heuristic when choosing + what to promote, expand, or deprecate; + - source of rollout-level metadata needed for MEMORY.md `### rollout_summary_files` + annotations; you should be able to find `cwd` and `updated_at` there. - `MEMORY.md` - merged memories; produce a lightly clustered version if applicable @@ -123,45 +129,148 @@ Rules: - If there is no meaningful signal to add beyond what already exists, keep outputs minimal. - You should always make sure `MEMORY.md` and `memory_summary.md` exist and are up to date. - Follow the format and schema of the artifacts below. +- Do not target fixed counts (memory blocks, task groups, topics, or bullets). Let the + signal determine the granularity and depth. +- Quality objective: for high-signal task families, `MEMORY.md` should be materially more + useful than `raw_memories.md` while remaining easy to navigate. ============================================================ 1) `MEMORY.md` FORMAT (STRICT) ============================================================ -Clustered schema: ---- -rollout_summary_files: - - () - - () -description: brief description of the shared tasks/outcomes -keywords: k1, k2, k3, ... ---- +`MEMORY.md` is the durable, retrieval-oriented handbook. Each block should be easy to grep +and rich enough to reuse without reopening raw rollout logs. + +Each memory block MUST start with: + +# Task Group: + +scope: + +- `Task Group` is for retrieval. Choose granularity based on memory density: + repo / project / workflow / detail-task family. +- `scope:` is for scanning. Keep it short and operational. + +Body format (strict): + +- Use the task-grouped markdown structure below (headings + bullets). Do not use a flat + bullet dump. +- The header (`# Task Group: ...` + `scope: ...`) is the index. The body contains + task-level detail. +- Every `## Task ` section MUST include task-local rollout files, task-local keywords, + and task-specific learnings. +- Use `-` bullets for lists and learnings. Do not use `*`. +- No bolding text in the memory body. + +Required task-oriented body shape (strict): + +## Task 1: + +task: + +### rollout_summary_files + +- (cwd=, updated_at=, ) + +### keywords + +- + +### learnings + +- +- +- +- cause -> fix> +- +- + +## Task 2: + +task: + +### rollout_summary_files -- - ... -Schema rules (strict): -- Keep entries compact and retrieval-friendly. -- A single note block may correspond to multiple related tasks; aggregate when tasks and lessons align. -- In `rollout_summary_files`, each parenthesized annotation must include - `cwd=` and `updated_at=` copied from that rollout summary metadata. - If missing from an individual rollout summary, recover them from `raw_memories.md`. -- If you need to reference skills, do it in the BODY as bullets, not in the header - (e.g., "- Related skill: skills//SKILL.md"). -- Use lowercase, hyphenated skill folder names. -- Preserve provenance: include the relevant rollout_summary_file(s) for the block. +### keywords -What to write in memory entries: Extract the highest-signal takeaways from the rollout -summaries, especially from "User preferences", "Reusable knowledge", "References", and -"Things that did not work / things that can be improved". -Write what would most help a future agent doing a similar (or adjacent) task: decision -triggers, key steps, proven commands/paths, and failure shields (symptom -> cause -> fix), -plus any stable user preferences. -If a rollout summary contains stable user profile details or preferences that generalize, -capture them here so they're easy to find and can be reflected in memory_summary.md. -The goal of MEMORY.md is to support related-but-not-identical future tasks, so keep -insights slightly more general; when a future task is very similar, expect the agent to -use the rollout summary for full detail. +- ... + +### learnings + +- + +... More `## Task ` sections if needed + +## General Tips + +- [Task 1] +- [Task 1][Task 2] +- + +Schema rules (strict): +- A) Structure and consistency + - Exact block shape: `# Task Group`, `scope:`, one or more `## Task `, and + `## General Tips`. + - Keep all tasks and tips inside the task family implied by the block header. + - Keep entries retrieval-friendly, but not shallow. + - Do not emit placeholder values (`task: task`, `# Task Group: misc`, `scope: general`, etc.). +- B) Task boundaries and clustering + - Primary organization unit is the task (`## Task `), not the rollout file. + - Default mapping: one coherent rollout summary -> one MEMORY block -> one `## Task 1`. + - If a rollout contains multiple distinct tasks, split them into multiple `## Task ` + sections. If those tasks belong to different task families, split into separate + MEMORY blocks (`# Task Group`). + - A MEMORY block may include multiple rollouts only when they belong to the same + task group and the task intent, technical context, and outcome pattern align. + - A single `## Task ` section may cite multiple rollout summaries when they are + iterative attempts or follow-up runs for the same task. + - Do not cluster on keyword overlap alone. + - When in doubt, preserve boundaries (separate tasks/blocks) rather than over-cluster. +- C) Provenance and metadata + - Every `## Task ` section must include `### rollout_summary_files`, `### keywords`, + and `### learnings`. + - `### rollout_summary_files` must be task-local (not a block-wide catch-all list). + - Each rollout annotation must include `cwd=` and `updated_at=`. + If missing from a rollout summary, recover them from `raw_memories.md`. + - Major learnings should be traceable to rollout summaries listed in the same task section. + - Order rollout references by freshness and practical usefulness. +- D) Retrieval and references + - `task:` lines must be specific and searchable. + - `### keywords` should be discriminative and task-local (tool names, error strings, + repo concepts, APIs/contracts). + - Put task-specific detail in `## Task ` and only deduplicated cross-task guidance in + `## General Tips`. + - If you reference skills, do it in body bullets only (for example: + `- Related skill: skills//SKILL.md`). + - Use lowercase, hyphenated skill folder names. +- E) Ordering and conflict handling + - For grouped blocks, order `## Task ` sections by practical usefulness, then recency. + - Treat `updated_at` as a first-class signal: fresher validated evidence usually wins. + - If evidence conflicts and validation is unclear, preserve the uncertainty explicitly. + - In `## General Tips`, cite task references (`[Task 1]`, `[Task 2]`, etc.) when + merging, deduplicating, or resolving evidence. + +What to write: +- Extract the takeaways from rollout summaries and raw_memories, especially sections like + "User preferences", "Reusable knowledge", "References", and "Things that did not work". +- Optimize for future related tasks: decision triggers, validated commands/paths, + verification steps, and failure shields (symptom -> cause -> fix). +- Capture stable user preferences/details that generalize so they can also inform + `memory_summary.md`. +- `MEMORY.md` should support related-but-not-identical tasks: slightly more general than a + rollout summary, but still operational and concrete. +- Use `raw_memories.md` as the routing layer; deep-dive into `rollout_summaries/*.md` when: + - the task is high-value and needs richer detail, + - multiple rollouts overlap and need conflict/staleness resolution, + - raw memory wording is too terse/ambiguous to consolidate confidently, + - you need stronger evidence, validation context, or user feedback. +- Each block should be useful on its own and materially richer than `memory_summary.md`: + - include concrete triggers, commands/paths, and failure shields, + - include outcome-specific notes (what worked, what failed, what remains uncertain), + - include scope boundaries / anti-drift notes when they affect future task success, + - include stale/conflict notes when newer evidence changes prior guidance. ============================================================ 2) `memory_summary.md` FORMAT (STRICT) @@ -210,17 +319,23 @@ For example, include (when known): ## What's in Memory This is a compact index to help future agents quickly find details in `MEMORY.md`, `skills/`, and `rollout_summaries/`. -Organize by topic. Each bullet should include: topic, keywords (used to search over -memory files), and a brief description. +Organize by topic. Each bullet must include: topic, keywords, and a clear description. Ordered by utility - which is the most likely to be useful for a future agent. +Do not target a fixed topic count. Cover the real high-signal areas and omit low-signal noise. +Prefer grouping by task family / workflow intent, not by incidental tools alone. Recommended format: - : , , , ... - - desc: + - desc: Notes: - Do not include large snippets; push details into MEMORY.md and rollout summaries. - Prefer topics/keywords that help a future agent search MEMORY.md efficiently. +- Prefer clear topic taxonomy over verbose drill-down pointers. +- Keep descriptions explicit enough that a future model can decide which keyword cluster + to search first for a new user query. +- Topic descriptions should mention what is inside, when to use it, and what kind of + outcome/procedure depth is available (for example: runbook, diagnostics, reporting, recovery). ============================================================ 3) `skills/` FORMAT (optional) @@ -303,29 +418,41 @@ WORKFLOW - create initial `skills/*` (optional but highly recommended) - write `memory_summary.md` last (highest-signal file) - Use your best efforts to get the most high-quality memory files - - Do not be lazy at browsing files at the INIT phase + - Do not be lazy at browsing files in INIT mode; deep-dive high-value rollouts and + conflicting task families until MEMORY blocks are richer and more useful than raw memories 3) INCREMENTAL UPDATE behavior: - Treat `raw_memories.md` as the primary source of NEW signal. - Read existing memory files first for continuity. - Integrate new signal into existing artifacts by: + - scanning new raw memories in recency order and identifying which existing blocks they should update - updating existing knowledge with better/newer evidence - updating stale or contradicting guidance + - expanding terse old blocks when new summaries/raw memories make the task family clearer - doing light clustering and merging if needed - updating existing skills or adding new skills only when there is clear new reusable procedure - update `memory_summary.md` last to reflect the final state of the memory folder -4) For both modes, update `MEMORY.md` after skill updates: - - add clear **Related skills** pointers in the BODY of corresponding note blocks (do - not change the YAML header schema) +4) Evidence deep-dive rule (both modes): + - `raw_memories.md` is the routing layer, not always the final authority for detail. + - When a task family is important, ambiguous, or duplicated across multiple rollouts, + open the relevant `rollout_summaries/*.md` files and extract richer procedural detail, + validation signals, and user feedback before finalizing `MEMORY.md`. + - Use `updated_at` and validation strength together to resolve stale/conflicting notes. -5) Housekeeping (optional): +5) For both modes, update `MEMORY.md` after skill updates: + - add clear related-skill pointers as plain bullets in the BODY of corresponding task + sections (do not change the `# Task Group` / `scope:` block header format) + +6) Housekeeping (optional): - remove clearly redundant/low-signal rollout summaries - if multiple summaries overlap for the same thread, keep the best one -6) Final pass: +7) Final pass: - remove duplication in memory_summary, skills/, and MEMORY.md - ensure any referenced skills/summaries actually exist + - ensure MEMORY blocks and "What's in Memory" use a consistent task-oriented taxonomy + - ensure recent important task families are easy to find (description + keywords + topic wording) - if there is no net-new or higher-quality signal to add, keep changes minimal (no churn for its own sake). @@ -341,6 +468,6 @@ Use `rg` for fast retrieval while consolidating: - Search durable notes: `rg -n -i "" "{{ memory_root }}/MEMORY.md"` - Search across memory tree: - `rg -n -i "" "{{ memory_root }}" | head -n 50` + `rg -n -i "" "{{ memory_root }}" | head -n 100` - Locate rollout summary files: - `rg --files "{{ memory_root }}/rollout_summaries" | head -n 200` + `rg --files "{{ memory_root }}/rollout_summaries" | head -n 400` diff --git a/codex-rs/core/templates/memories/read_path.md b/codex-rs/core/templates/memories/read_path.md index 6ef09247b..ec123fb55 100644 --- a/codex-rs/core/templates/memories/read_path.md +++ b/codex-rs/core/templates/memories/read_path.md @@ -25,8 +25,8 @@ again) - {{ base_path }}/rollout_summaries/ (per-rollout recaps + evidence snippets) Quick memory pass (when applicable): -1) Skim the MEMORY_SUMMARY included below and extract a few task-relevant -keywords (for example repo/module names, error strings, etc.). +1) Skim the MEMORY_SUMMARY included below and extract task-relevant topics and +keywords (for example repo/module names, workflows, error strings, etc.). 2) Search {{ base_path }}/MEMORY.md for those keywords, and for any referenced rollout summary files and skills. 3) If relevant rollout summary files and skills exist, open matching files diff --git a/codex-rs/core/templates/memories/stage_one_system.md b/codex-rs/core/templates/memories/stage_one_system.md index 38031e8d2..692243c90 100644 --- a/codex-rs/core/templates/memories/stage_one_system.md +++ b/codex-rs/core/templates/memories/stage_one_system.md @@ -109,6 +109,16 @@ Typical real-world signals (use as examples when analyzing the rollout): 3) User keeps iterating on the same task: - Requests for fixes/revisions on the same artifact usually mean partial, not success. - Requesting a restart or pointing out contradictions often indicates fail. +4) Last task in the rollout: + - Treat the final task more conservatively than earlier tasks. + - If there is no explicit user feedback or environment validation for the final task, + prefer `uncertain` (or `partial` if there was obvious progress but no confirmation). + - For non-final tasks, switching to another task without unresolved blockers is a stronger + positive signal. + +Signal priority: +- Explicit user feedback and explicit environment/test/tool validation outrank all heuristics. +- If heuristic signals conflict with explicit feedback, follow explicit feedback. Fallback heuristics: - Success: explicit "done/works", tests pass, correct artifact produced, user @@ -152,6 +162,8 @@ This summary should be very comprehensive and detailed, because it will be furth distilled into MEMORY.md and memory_summary.md. There is no strict size limit, and you should feel free to list a lot of points here as long as they are helpful. +Do not target fixed counts (tasks, bullets, references, or topics). Let the rollout's +signal density decide how much to write. Instructional notes in angle brackets are guidance only; do not include them verbatim in the rollout summary. Template (items are flexible; include only what is useful): @@ -170,7 +182,7 @@ User preferences: -## Task : +## Task : Outcome: Key steps: @@ -188,9 +200,9 @@ Things that did not work / things that can be improved: user approval."> - ... -Reusable knowledge: +Reusable knowledge: - - : - [3] final verification evidence or explicit user feedback -## Task (if there are multiple tasks): +## Task (if there are multiple tasks): ... +Task section quality bar (strict): +- Each task section should be detailed enough that other agent can understand it without + reopening the raw rollout. +- For each task, cover the following when evidence exists (and state uncertainty when it + does not): + - what the user wanted / expected, + - what was attempted and what actually worked, + - what failed or remained uncertain and why, + - how the outcome was validated (user feedback, tests, tool output, or explicit lack of validation), + - reusable procedure/checklist and failure shields, + - concrete artifacts/commands/paths/error signatures that future agents can reuse. +- Do not be terse in task sections. Rich, evidence-backed task summaries are preferred + over compact summaries. + ============================================================ `raw_memory` FORMAT (STRICT) ============================================================ The schema is below. --- -rollout_summary_file: -description: brief description of the task and outcome +description: concise but information-dense description of the primary task(s), outcome, and highest-value takeaway +task: +task_group: +task_outcome: keywords: k1, k2, k3, ... --- -- + +Then write task-grouped body content (required): +### Task 1: +task: +task_group: +task_outcome: +- - ... +### Task 2: (if needed) +task: ... +task_group: ... +task_outcome: ... +- ... + +Preferred task-block body shape (strongly recommended): +- `### Task ` blocks should preserve task-specific retrieval signal and consolidation-ready detail. +- Within each task block, include bullets that explicitly cover (when applicable): + - user goal / expected outcome, + - what worked (key steps, commands, code paths, artifacts), + - what did not work or drifted (and what pivot worked), + - validation state (user confirmation, tests, runtime checks, or missing validation), + - reusable procedure/checklist and failure shields, + - high-signal evidence pointers (error strings, commands, files, IDs, URLs, etc.). +- Prefer labeled bullets when useful (for example: `- User goal: ...`, `- Validation: ...`, + `- Failure shield: ...`) so Phase 2 can retrieve and consolidate faster. + +Task grouping rules (strict): +- Every distinct user task in the thread must appear as its own `### Task ` block. +- Do not merge unrelated tasks into one block just because they happen in the same thread. +- If a thread contains only one task, keep exactly one task block. +- For each task block, keep the outcome tied to evidence relevant to that task. +- If a thread has partially related tasks, prefer splitting into separate task blocks and + linking them through shared keywords rather than merging. + What to write in memory entries: Extract useful takeaways from the rollout summaries, especially from "User preferences", "Reusable knowledge", "References", and "Things that did not work / things that can be improved". @@ -249,10 +309,17 @@ Write what would help a future agent doing a similar (or adjacent) task: decisio triggers, key steps, proven commands/paths, and failure shields (symptom -> cause -> fix), plus any stable user preferences. If a rollout summary contains stable user profile details or preferences that generalize, -capture them here so they're easy to find and can be reflected in memory_summary.md. +capture them here so they're easy to find without checking rollout summary. The goal is to support related-but-not-identical future tasks, so keep insights slightly more general; when a future task is very similar, expect the agent to use the rollout summary for full detail. +For each task block, include enough detail to be useful for future agent reference: +- what the user wanted and expected, +- what was attempted and what actually worked, +- what failed or remained uncertain and why, +- what evidence validates the outcome (user feedback, environment/test feedback, or lack of both), +- reusable procedures/checklists and failure shields that should survive future similar tasks, +- artifacts and retrieval handles (commands, file paths, error strings, IDs) that make the task easy to rediscover. ============================================================ @@ -264,4 +331,6 @@ WORKFLOW 1) Triage outcome using the common rules. 2) Read the rollout carefully (do not miss user messages/tool calls/outputs). 3) Return `rollout_summary`, `rollout_slug`, and `raw_memory`, valid JSON only. - No markdown wrapper, no prose outside JSON. \ No newline at end of file + No markdown wrapper, no prose outside JSON. + +- Do not be terse in task sections. Include validation signal, failure mode, and reusable procedure per task when available.