From c16f9daaaf50de0bf516a56dfd4b95b322d57fa3 Mon Sep 17 00:00:00 2001 From: Charley Cunningham Date: Tue, 17 Feb 2026 22:30:29 -0800 Subject: [PATCH] Add model-visible context layout snapshot tests (#12073) ## Summary - add a dedicated `core/tests/suite/model_visible_layout.rs` snapshot suite to materialize model-visible request layout in high-value scenarios - add three reviewer-focused snapshot scenarios: - turn-level context updates (cwd / permissions / personality) - first post-resume turn with model hydration + personality change - first post-resume turn where pre-turn model override matches rollout model - wire the new suite into `core/tests/suite/mod.rs` - commit generated `insta` snapshots under `core/tests/suite/snapshots/` ## Why This creates a stable, reviewable baseline of model-visible context layout against `main` before follow-on context-management refactors. It lets subsequent PRs show focused snapshot diffs for behavior changes instead of introducing the test surface and behavior changes at once. ## Testing - `just fmt` - `INSTA_UPDATE=always cargo test -p codex-core model_visible_layout` --- codex-rs/core/tests/suite/mod.rs | 1 + .../core/tests/suite/model_visible_layout.rs | 443 ++++++++++++++++++ ...ut_cwd_change_does_not_refresh_agents.snap | 24 + ...resume_override_matches_rollout_model.snap | 22 + ...layout_resume_with_personality_change.snap | 26 + ...__model_visible_layout_turn_overrides.snap | 24 + 6 files changed, 540 insertions(+) create mode 100644 codex-rs/core/tests/suite/model_visible_layout.rs create mode 100644 codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_cwd_change_does_not_refresh_agents.snap create mode 100644 codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_override_matches_rollout_model.snap create mode 100644 codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_with_personality_change.snap create mode 100644 codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_turn_overrides.snap diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs index 03eec5bde..e420d12f9 100644 --- a/codex-rs/core/tests/suite/mod.rs +++ b/codex-rs/core/tests/suite/mod.rs @@ -85,6 +85,7 @@ mod model_info_overrides; mod model_overrides; mod model_switching; mod model_tools; +mod model_visible_layout; mod models_cache_ttl; mod models_etag_responses; mod otel; diff --git a/codex-rs/core/tests/suite/model_visible_layout.rs b/codex-rs/core/tests/suite/model_visible_layout.rs new file mode 100644 index 000000000..eda1b8915 --- /dev/null +++ b/codex-rs/core/tests/suite/model_visible_layout.rs @@ -0,0 +1,443 @@ +#![allow(clippy::expect_used)] + +use std::fs; +use std::sync::Arc; + +use anyhow::Result; +use codex_core::config::types::Personality; +use codex_core::features::Feature; +use codex_core::protocol::AskForApproval; +use codex_core::protocol::EventMsg; +use codex_core::protocol::Op; +use codex_core::protocol::SandboxPolicy; +use codex_protocol::config_types::ReasoningSummary; +use codex_protocol::user_input::UserInput; +use core_test_support::context_snapshot; +use core_test_support::context_snapshot::ContextSnapshotOptions; +use core_test_support::context_snapshot::ContextSnapshotRenderMode; +use core_test_support::responses::ResponsesRequest; +use core_test_support::responses::ev_assistant_message; +use core_test_support::responses::ev_completed; +use core_test_support::responses::ev_response_created; +use core_test_support::responses::mount_sse_once; +use core_test_support::responses::mount_sse_sequence; +use core_test_support::responses::sse; +use core_test_support::responses::start_mock_server; +use core_test_support::skip_if_no_network; +use core_test_support::test_codex::test_codex; +use core_test_support::wait_for_event; + +const PRETURN_CONTEXT_DIFF_CWD: &str = "PRETURN_CONTEXT_DIFF_CWD"; + +fn context_snapshot_options() -> ContextSnapshotOptions { + ContextSnapshotOptions::default() + .render_mode(ContextSnapshotRenderMode::KindWithTextPrefix { max_chars: 96 }) +} + +fn format_labeled_requests_snapshot( + scenario: &str, + sections: &[(&str, &ResponsesRequest)], +) -> String { + context_snapshot::format_labeled_requests_snapshot( + scenario, + sections, + &context_snapshot_options(), + ) +} + +fn agents_message_count(request: &ResponsesRequest) -> usize { + request + .message_input_texts("user") + .iter() + .filter(|text| text.starts_with("# AGENTS.md instructions for ")) + .count() +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn snapshot_model_visible_layout_turn_overrides() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let responses = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_assistant_message("msg-1", "turn one complete"), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-2", "turn two complete"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let mut builder = test_codex() + .with_model("gpt-5.2-codex") + .with_config(|config| { + config.features.enable(Feature::Personality); + config.personality = Some(Personality::Pragmatic); + }); + let test = builder.build(&server).await?; + let preturn_context_diff_cwd = test.cwd_path().join(PRETURN_CONTEXT_DIFF_CWD); + fs::create_dir_all(&preturn_context_diff_cwd)?; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "first turn".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: test.cwd_path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: test.session_configured.model.clone(), + effort: test.config.model_reasoning_effort, + summary: ReasoningSummary::Auto, + collaboration_mode: None, + personality: None, + }) + .await?; + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "second turn with context updates".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: preturn_context_diff_cwd, + approval_policy: AskForApproval::OnRequest, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: test.session_configured.model.clone(), + effort: test.config.model_reasoning_effort, + summary: ReasoningSummary::Auto, + collaboration_mode: None, + personality: Some(Personality::Friendly), + }) + .await?; + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let requests = responses.requests(); + assert_eq!(requests.len(), 2, "expected two requests"); + insta::assert_snapshot!( + "model_visible_layout_turn_overrides", + format_labeled_requests_snapshot( + "Second turn changes cwd, approval policy, and personality while keeping model constant.", + &[ + ("First Request (Baseline)", &requests[0]), + ("Second Request (Turn Overrides)", &requests[1]), + ] + ) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +// TODO(ccunningham): Diff `user_instructions` and emit updates when AGENTS.md content changes +// (for example after cwd changes), then update this test to assert refreshed AGENTS content. +async fn snapshot_model_visible_layout_cwd_change_does_not_refresh_agents() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let responses = mount_sse_sequence( + &server, + vec![ + sse(vec![ + ev_response_created("resp-1"), + ev_assistant_message("msg-1", "turn one complete"), + ev_completed("resp-1"), + ]), + sse(vec![ + ev_response_created("resp-2"), + ev_assistant_message("msg-2", "turn two complete"), + ev_completed("resp-2"), + ]), + ], + ) + .await; + + let mut builder = test_codex().with_model("gpt-5.2-codex"); + let test = builder.build(&server).await?; + let cwd_one = test.cwd_path().join("agents_one"); + let cwd_two = test.cwd_path().join("agents_two"); + fs::create_dir_all(&cwd_one)?; + fs::create_dir_all(&cwd_two)?; + fs::write( + cwd_one.join("AGENTS.md"), + "# AGENTS one\n\n\nTurn one agents instructions.\n\n", + )?; + fs::write( + cwd_two.join("AGENTS.md"), + "# AGENTS two\n\n\nTurn two agents instructions.\n\n", + )?; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "first turn in agents_one".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: cwd_one.clone(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: test.session_configured.model.clone(), + effort: test.config.model_reasoning_effort, + summary: ReasoningSummary::Auto, + collaboration_mode: None, + personality: None, + }) + .await?; + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + test.codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "second turn in agents_two".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: cwd_two, + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: test.session_configured.model.clone(), + effort: test.config.model_reasoning_effort, + summary: ReasoningSummary::Auto, + collaboration_mode: None, + personality: None, + }) + .await?; + wait_for_event(&test.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let requests = responses.requests(); + assert_eq!(requests.len(), 2, "expected two requests"); + assert_eq!( + agents_message_count(&requests[0]), + 1, + "expected exactly one AGENTS message in first request" + ); + assert_eq!( + agents_message_count(&requests[1]), + 1, + "expected AGENTS to refresh after cwd change, but current behavior only keeps history AGENTS" + ); + insta::assert_snapshot!( + "model_visible_layout_cwd_change_does_not_refresh_agents", + format_labeled_requests_snapshot( + "Second turn changes cwd to a directory with different AGENTS.md; current behavior does not emit refreshed AGENTS instructions.", + &[ + ("First Request (agents_one)", &requests[0]), + ("Second Request (agents_two cwd)", &requests[1]), + ] + ) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn snapshot_model_visible_layout_resume_with_personality_change() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut initial_builder = test_codex().with_config(|config| { + config.model = Some("gpt-5.2".to_string()); + }); + let initial = initial_builder.build(&server).await?; + let codex = Arc::clone(&initial.codex); + let home = initial.home.clone(); + let rollout_path = initial + .session_configured + .rollout_path + .clone() + .expect("rollout path"); + + let initial_mock = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-initial"), + ev_assistant_message("msg-1", "recorded before resume"), + ev_completed("resp-initial"), + ]), + ) + .await; + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "seed resume history".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + let initial_request = initial_mock.single_request(); + + let resumed_mock = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-resume"), + ev_assistant_message("msg-2", "first resumed turn"), + ev_completed("resp-resume"), + ]), + ) + .await; + + let mut resume_builder = test_codex().with_config(|config| { + config.model = Some("gpt-5.2-codex".to_string()); + config.features.enable(Feature::Personality); + config.personality = Some(Personality::Pragmatic); + }); + let resumed = resume_builder.resume(&server, home, rollout_path).await?; + resumed + .codex + .submit(Op::UserTurn { + items: vec![UserInput::Text { + text: "resume and change personality".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + cwd: resumed.cwd_path().to_path_buf(), + approval_policy: AskForApproval::Never, + sandbox_policy: SandboxPolicy::new_read_only_policy(), + model: resumed.session_configured.model.clone(), + effort: resumed.config.model_reasoning_effort, + summary: ReasoningSummary::Auto, + collaboration_mode: None, + personality: Some(Personality::Friendly), + }) + .await?; + wait_for_event(&resumed.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let resumed_request = resumed_mock.single_request(); + insta::assert_snapshot!( + "model_visible_layout_resume_with_personality_change", + format_labeled_requests_snapshot( + "First post-resume turn where resumed config model differs from rollout and personality changes.", + &[ + ("Last Request Before Resume", &initial_request), + ("First Request After Resume", &resumed_request), + ] + ) + ); + + Ok(()) +} + +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn snapshot_model_visible_layout_resume_override_matches_rollout_model() -> Result<()> { + skip_if_no_network!(Ok(())); + + let server = start_mock_server().await; + let mut initial_builder = test_codex().with_config(|config| { + config.model = Some("gpt-5.2".to_string()); + }); + let initial = initial_builder.build(&server).await?; + let codex = Arc::clone(&initial.codex); + let home = initial.home.clone(); + let rollout_path = initial + .session_configured + .rollout_path + .clone() + .expect("rollout path"); + + let initial_mock = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-initial"), + ev_assistant_message("msg-1", "recorded before resume"), + ev_completed("resp-initial"), + ]), + ) + .await; + codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "seed resume history".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&codex, |event| matches!(event, EventMsg::TurnComplete(_))).await; + let initial_request = initial_mock.single_request(); + + let resumed_mock = mount_sse_once( + &server, + sse(vec![ + ev_response_created("resp-resume"), + ev_assistant_message("msg-2", "first resumed turn"), + ev_completed("resp-resume"), + ]), + ) + .await; + + let mut resume_builder = test_codex().with_config(|config| { + config.model = Some("gpt-5.2-codex".to_string()); + }); + let resumed = resume_builder.resume(&server, home, rollout_path).await?; + resumed + .codex + .submit(Op::OverrideTurnContext { + cwd: None, + approval_policy: None, + sandbox_policy: None, + windows_sandbox_level: None, + model: Some("gpt-5.2".to_string()), + effort: None, + summary: None, + collaboration_mode: None, + personality: None, + }) + .await?; + resumed + .codex + .submit(Op::UserInput { + items: vec![UserInput::Text { + text: "first resumed turn after model override".into(), + text_elements: Vec::new(), + }], + final_output_json_schema: None, + }) + .await?; + wait_for_event(&resumed.codex, |event| { + matches!(event, EventMsg::TurnComplete(_)) + }) + .await; + + let resumed_request = resumed_mock.single_request(); + insta::assert_snapshot!( + "model_visible_layout_resume_override_matches_rollout_model", + format_labeled_requests_snapshot( + "First post-resume turn where pre-turn override sets model to rollout model; no model-switch update should appear.", + &[ + ("Last Request Before Resume", &initial_request), + ("First Request After Resume + Override", &resumed_request), + ] + ) + ); + + Ok(()) +} diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_cwd_change_does_not_refresh_agents.snap b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_cwd_change_does_not_refresh_agents.snap new file mode 100644 index 000000000..9b6291809 --- /dev/null +++ b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_cwd_change_does_not_refresh_agents.snap @@ -0,0 +1,24 @@ +--- +source: core/tests/suite/model_visible_layout.rs +expression: "format_labeled_requests_snapshot(\"Second turn changes cwd to a directory with different AGENTS.md; current behavior does not emit refreshed AGENTS instructions.\",\n&[(\"First Request (agents_one)\", &requests[0]),\n(\"Second Request (agents_two cwd)\", &requests[1]),])" +--- +Scenario: Second turn changes cwd to a directory with different AGENTS.md; current behavior does not emit refreshed AGENTS instructions. + +## First Request (agents_one) +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:> +04:message/developer: +05:message/user:first turn in agents_one + +## Second Request (agents_two cwd) +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:> +04:message/developer: +05:message/user:first turn in agents_one +06:message/assistant:turn one complete +07:message/user:> +08:message/user:second turn in agents_two diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_override_matches_rollout_model.snap b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_override_matches_rollout_model.snap new file mode 100644 index 000000000..850b6be0a --- /dev/null +++ b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_override_matches_rollout_model.snap @@ -0,0 +1,22 @@ +--- +source: core/tests/suite/model_visible_layout.rs +expression: "format_labeled_requests_snapshot(\"First post-resume turn where pre-turn override sets model to rollout model; no model-switch update should appear.\",\n&[(\"Last Request Before Resume\", &initial_request),\n(\"First Request After Resume + Override\", &resumed_request),])" +--- +Scenario: First post-resume turn where pre-turn override sets model to rollout model; no model-switch update should appear. + +## Last Request Before Resume +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:seed resume history + +## First Request After Resume + Override +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:seed resume history +04:message/assistant:recorded before resume +05:message/developer: +06:message/user: +07:message/user:> +08:message/user:first resumed turn after model override diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_with_personality_change.snap b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_with_personality_change.snap new file mode 100644 index 000000000..a37cd885e --- /dev/null +++ b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_resume_with_personality_change.snap @@ -0,0 +1,26 @@ +--- +source: core/tests/suite/model_visible_layout.rs +expression: "format_labeled_requests_snapshot(\"First post-resume turn where resumed config model differs from rollout and personality changes.\",\n&[(\"Last Request Before Resume\", &initial_request),\n(\"First Request After Resume\", &resumed_request),])" +--- +Scenario: First post-resume turn where resumed config model differs from rollout and personality changes. + +## Last Request Before Resume +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:seed resume history + +## First Request After Resume +00:message/developer: +01:message/user: +02:message/user:> +03:message/user:seed resume history +04:message/assistant:recorded before resume +05:message/developer: +06:message/developer: The user has requested a new communication style. Future messages should adhe... +07:message/user: +08:message/user:> +09:message/developer: +10:message/developer:\nThe user was previously using a different model. Please continue the conversatio... +11:message/developer: The user has requested a new communication style. Future messages should adhe... +12:message/user:resume and change personality diff --git a/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_turn_overrides.snap b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_turn_overrides.snap new file mode 100644 index 000000000..9ec0b2e7d --- /dev/null +++ b/codex-rs/core/tests/suite/snapshots/all__suite__model_visible_layout__model_visible_layout_turn_overrides.snap @@ -0,0 +1,24 @@ +--- +source: core/tests/suite/model_visible_layout.rs +expression: "format_labeled_requests_snapshot(\"Second turn changes cwd, approval policy, and personality while keeping model constant.\",\n&[(\"First Request (Baseline)\", &requests[0]),\n(\"Second Request (Turn Overrides)\", &requests[1]),])" +--- +Scenario: Second turn changes cwd, approval policy, and personality while keeping model constant. + +## First Request (Baseline) +00:message/developer: +01:message/user: +02:message/user:> +03:message/developer: +04:message/user:first turn + +## Second Request (Turn Overrides) +00:message/developer: +01:message/user: +02:message/user:> +03:message/developer: +04:message/user:first turn +05:message/assistant:turn one complete +06:message/user: +07:message/developer: +08:message/developer: The user has requested a new communication style. Future messages should adhe... +09:message/user:second turn with context updates