chore(core) Consolidate apply_patch tests (#6545)

## Summary Consolidates our apply_patch tests into one suite, and ensures each test case tests the various ways the harness supports apply_patch: 1. Freeform custom tool call 2. JSON function tool 3. Simple shell call 4. Heredoc shell call There are a few test cases that are specific to a particular variant, I've left those alone. ## Testing - [x] This adds a significant number of tests
2025-11-13 15:52:39 -08:00 · 2025-11-13 15:52:39 -08:00 · 2c1b693da4
commit 2c1b693da4
parent 547be54ee8
7 changed files with 289 additions and 1079 deletions
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@ -1115,6 +1115,7 @@ dependencies = [
 "similar",
 "strum_macros 0.27.2",
 "tempfile",
+ "test-case",
 "test-log",
 "thiserror 2.0.17",
 "time",
@ -6161,6 +6162,39 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"

+[[package]]
+name = "test-case"
+version = "3.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb2550dd13afcd286853192af8601920d959b14c401fcece38071d53bf0768a8"
+dependencies = [
+ "test-case-macros",
+]
+
+[[package]]
+name = "test-case-core"
+version = "3.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adcb7fd841cd518e279be3d5a3eb0636409487998a4aff22f3de87b81e88384f"
+dependencies = [
+ "cfg-if",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "test-case-macros"
+version = "3.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+ "test-case-core",
+]
+
 [[package]]
 name = "test-log"
 version = "0.2.18"
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@ -60,6 +60,7 @@ shlex = { workspace = true }
 similar = { workspace = true }
 strum_macros = { workspace = true }
 tempfile = { workspace = true }
+test-case = "3.3.1"
 test-log = { workspace = true }
 thiserror = { workspace = true }
 time = { workspace = true, features = [
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@ -12,6 +12,8 @@ use wiremock::ResponseTemplate;
 use wiremock::matchers::method;
 use wiremock::matchers::path_regex;

+use crate::test_codex::ApplyPatchModelOutput;
+
 #[derive(Debug, Clone)]
 pub struct ResponseMock {
    requests: Arc<Mutex<Vec<ResponsesRequest>>>,
@ -367,6 +369,21 @@ pub fn ev_local_shell_call(call_id: &str, status: &str, command: Vec<&str>) -> V
    })
 }

+pub fn ev_apply_patch_call(
+    call_id: &str,
+    patch: &str,
+    output_type: ApplyPatchModelOutput,
+) -> Value {
+    match output_type {
+        ApplyPatchModelOutput::Freeform => ev_apply_patch_custom_tool_call(call_id, patch),
+        ApplyPatchModelOutput::Function => ev_apply_patch_function_call(call_id, patch),
+        ApplyPatchModelOutput::Shell => ev_apply_patch_shell_call(call_id, patch),
+        ApplyPatchModelOutput::ShellViaHeredoc => {
+            ev_apply_patch_shell_call_via_heredoc(call_id, patch)
+        }
+    }
+}
+
 /// Convenience: SSE event for an `apply_patch` custom tool call with raw patch
 /// text. This mirrors the payload produced by the Responses API when the model
 /// invokes `apply_patch` directly (before we convert it to a function call).
@ -400,6 +417,21 @@ pub fn ev_apply_patch_function_call(call_id: &str, patch: &str) -> Value {
    })
 }

+pub fn ev_apply_patch_shell_call(call_id: &str, patch: &str) -> Value {
+    let args = serde_json::json!({ "command": ["apply_patch", patch] });
+    let arguments = serde_json::to_string(&args).expect("serialize apply_patch arguments");
+
+    ev_function_call(call_id, "shell", &arguments)
+}
+
+pub fn ev_apply_patch_shell_call_via_heredoc(call_id: &str, patch: &str) -> Value {
+    let script = format!("apply_patch <<'EOF'\n{patch}\nEOF\n");
+    let args = serde_json::json!({ "command": ["bash", "-lc", script] });
+    let arguments = serde_json::to_string(&args).expect("serialize apply_patch arguments");
+
+    ev_function_call(call_id, "shell", &arguments)
+}
+
 pub fn sse_failed(id: &str, code: &str, message: &str) -> String {
    sse(vec![serde_json::json!({
        "type": "response.failed",
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@ -29,6 +29,15 @@ use crate::wait_for_event;

 type ConfigMutator = dyn FnOnce(&mut Config) + Send;

+/// A collection of different ways the model can output an apply_patch call
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum ApplyPatchModelOutput {
+    Freeform,
+    Function,
+    Shell,
+    ShellViaHeredoc,
+}
+
 pub struct TestCodexBuilder {
    config_mutators: Vec<Box<ConfigMutator>>,
 }
@ -265,6 +274,19 @@ impl TestCodexHarness {
            .expect("output string")
            .to_string()
    }
+
+    pub async fn apply_patch_output(
+        &self,
+        call_id: &str,
+        output_type: ApplyPatchModelOutput,
+    ) -> String {
+        match output_type {
+            ApplyPatchModelOutput::Freeform => self.custom_tool_call_output(call_id).await,
+            ApplyPatchModelOutput::Function
+            | ApplyPatchModelOutput::Shell
+            | ApplyPatchModelOutput::ShellViaHeredoc => self.function_call_stdout(call_id).await,
+        }
+    }
 }

 fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
--- a/codex-rs/core/tests/suite/apply_patch_cli.rs
+++ b/codex-rs/core/tests/suite/apply_patch_cli.rs
@ -1,6 +1,8 @@
 #![allow(clippy::expect_used)]

 use anyhow::Result;
+use core_test_support::responses::ev_apply_patch_call;
+use core_test_support::test_codex::ApplyPatchModelOutput;
 use pretty_assertions::assert_eq;
 use std::fs;

@ -25,6 +27,7 @@ use core_test_support::skip_if_no_network;
 use core_test_support::test_codex::TestCodexHarness;
 use core_test_support::wait_for_event;
 use serde_json::json;
+use test_case::test_case;

 async fn apply_patch_harness() -> Result<TestCodexHarness> {
    apply_patch_harness_with(|_| {}).await
@ -45,19 +48,25 @@ async fn mount_apply_patch(
    call_id: &str,
    patch: &str,
    assistant_msg: &str,
+    output_type: ApplyPatchModelOutput,
 ) {
    mount_sse_sequence(
        harness.server(),
-        apply_patch_responses(call_id, patch, assistant_msg),
+        apply_patch_responses(call_id, patch, assistant_msg, output_type),
    )
    .await;
 }

-fn apply_patch_responses(call_id: &str, patch: &str, assistant_msg: &str) -> Vec<String> {
+fn apply_patch_responses(
+    call_id: &str,
+    patch: &str,
+    assistant_msg: &str,
+    output_type: ApplyPatchModelOutput,
+) -> Vec<String> {
    vec![
        sse(vec![
            ev_response_created("resp-1"),
-            ev_apply_patch_function_call(call_id, patch),
+            ev_apply_patch_call(call_id, patch, output_type),
            ev_completed("resp-1"),
        ]),
        sse(vec![
@ -68,7 +77,13 @@ fn apply_patch_responses(call_id: &str, patch: &str, assistant_msg: &str) -> Vec
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_multiple_operations_integration() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_multiple_operations_integration(
+    output_type: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|config| {
@ -86,11 +101,11 @@ async fn apply_patch_cli_multiple_operations_integration() -> Result<()> {
    let patch = "*** Begin Patch\n*** Add File: nested/new.txt\n+created\n*** Delete File: delete.txt\n*** Update File: modify.txt\n@@\n-line2\n+changed\n*** End Patch";

    let call_id = "apply-multi-ops";
-    mount_apply_patch(&harness, call_id, patch, "done").await;
+    mount_apply_patch(&harness, call_id, patch, "done", output_type).await;

    harness.submit("please apply multi-ops patch").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, output_type).await;

    let expected = r"(?s)^Exit code: 0
 Wall time: [0-9]+(?:\.[0-9]+)? seconds
@ -113,7 +128,11 @@ D delete.txt
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_multiple_chunks() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_multiple_chunks(model_output: ApplyPatchModelOutput) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -123,7 +142,7 @@ async fn apply_patch_cli_multiple_chunks() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: multi.txt\n@@\n-line2\n+changed2\n@@\n-line4\n+changed4\n*** End Patch";
    let call_id = "apply-multi-chunks";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply multi-chunk patch").await?;

@ -135,7 +154,13 @@ async fn apply_patch_cli_multiple_chunks() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_moves_file_to_new_directory() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_moves_file_to_new_directory(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -147,7 +172,7 @@ async fn apply_patch_cli_moves_file_to_new_directory() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-old content\n+new content\n*** End Patch";
    let call_id = "apply-move";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply move patch").await?;

@ -157,7 +182,13 @@ async fn apply_patch_cli_moves_file_to_new_directory() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_updates_file_appends_trailing_newline() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_updates_file_appends_trailing_newline(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -167,7 +198,7 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: no_newline.txt\n@@\n-no newline at end\n+first line\n+second line\n*** End Patch";
    let call_id = "apply-append-nl";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply newline patch").await?;

@ -178,7 +209,13 @@ async fn apply_patch_cli_updates_file_appends_trailing_newline() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_insert_only_hunk_modifies_file() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_insert_only_hunk_modifies_file(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -188,7 +225,7 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: insert_only.txt\n@@\n alpha\n+beta\n omega\n*** End Patch";
    let call_id = "apply-insert-only";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("insert lines via apply_patch").await?;

@ -197,7 +234,13 @@ async fn apply_patch_cli_insert_only_hunk_modifies_file() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_move_overwrites_existing_destination() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_move_overwrites_existing_destination(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -211,7 +254,7 @@ async fn apply_patch_cli_move_overwrites_existing_destination() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/dir/name.txt\n@@\n-from\n+new\n*** End Patch";
    let call_id = "apply-move-overwrite";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply move overwrite patch").await?;

@ -221,7 +264,13 @@ async fn apply_patch_cli_move_overwrites_existing_destination() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_move_without_content_change_has_no_turn_diff() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_move_without_content_change_has_no_turn_diff(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -236,7 +285,7 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff() -> Resul

    let patch = "*** Begin Patch\n*** Update File: old/name.txt\n*** Move to: renamed/name.txt\n@@\n same\n*** End Patch";
    let call_id = "apply-move-no-change";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    let model = test.session_configured.model.clone();
    codex
@ -272,7 +321,13 @@ async fn apply_patch_cli_move_without_content_change_has_no_turn_diff() -> Resul
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_add_overwrites_existing_file() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_add_overwrites_existing_file(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -282,7 +337,7 @@ async fn apply_patch_cli_add_overwrites_existing_file() -> Result<()> {

    let patch = "*** Begin Patch\n*** Add File: duplicate.txt\n+new content\n*** End Patch";
    let call_id = "apply-add-overwrite";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply add overwrite patch").await?;

@ -291,18 +346,24 @@ async fn apply_patch_cli_add_overwrites_existing_file() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_rejects_invalid_hunk_header() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_rejects_invalid_hunk_header(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;

    let patch = "*** Begin Patch\n*** Frobnicate File: foo\n*** End Patch";
    let call_id = "apply-invalid-header";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply invalid header patch").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;

    assert!(
        out.contains("apply_patch verification failed"),
@ -316,7 +377,13 @@ async fn apply_patch_cli_rejects_invalid_hunk_header() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_reports_missing_context() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_reports_missing_context(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -327,11 +394,11 @@ async fn apply_patch_cli_reports_missing_context() -> Result<()> {
    let patch =
        "*** Begin Patch\n*** Update File: modify.txt\n@@\n-missing\n+changed\n*** End Patch";
    let call_id = "apply-missing-context";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply missing context patch").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;

    assert!(
        out.contains("apply_patch verification failed"),
@ -343,18 +410,24 @@ async fn apply_patch_cli_reports_missing_context() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_reports_missing_target_file() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_reports_missing_target_file(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;

    let patch = "*** Begin Patch\n*** Update File: missing.txt\n@@\n-nope\n+better\n*** End Patch";
    let call_id = "apply-missing-file";
-    mount_apply_patch(&harness, call_id, patch, "fail").await;
+    mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;

    harness.submit("attempt to update a missing file").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(
        out.contains("apply_patch verification failed"),
        "expected verification failure message"
@ -372,18 +445,24 @@ async fn apply_patch_cli_reports_missing_target_file() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_delete_missing_file_reports_error() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_delete_missing_file_reports_error(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;

    let patch = "*** Begin Patch\n*** Delete File: missing.txt\n*** End Patch";
    let call_id = "apply-delete-missing";
-    mount_apply_patch(&harness, call_id, patch, "fail").await;
+    mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;

    harness.submit("attempt to delete missing file").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;

    assert!(
        out.contains("apply_patch verification failed"),
@ -402,18 +481,22 @@ async fn apply_patch_cli_delete_missing_file_reports_error() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_rejects_empty_patch() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_rejects_empty_patch(model_output: ApplyPatchModelOutput) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;

    let patch = "*** Begin Patch\n*** End Patch";
    let call_id = "apply-empty";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply empty patch").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(
        out.contains("patch rejected: empty patch"),
        "expected rejection for empty patch: {out}"
@ -422,7 +505,13 @@ async fn apply_patch_cli_rejects_empty_patch() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_delete_directory_reports_verification_error(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -431,18 +520,24 @@ async fn apply_patch_cli_delete_directory_reports_verification_error() -> Result

    let patch = "*** Begin Patch\n*** Delete File: dir\n*** End Patch";
    let call_id = "apply-delete-dir";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("delete a directory via apply_patch").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(out.contains("apply_patch verification failed"));
    assert!(out.contains("Failed to read"));
    Ok(())
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_rejects_path_traversal_outside_workspace(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -458,7 +553,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()

    let patch = "*** Begin Patch\n*** Add File: ../escape.txt\n+outside\n*** End Patch";
    let call_id = "apply-path-traversal";
-    mount_apply_patch(&harness, call_id, patch, "fail").await;
+    mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;

    let sandbox_policy = SandboxPolicy::WorkspaceWrite {
        writable_roots: vec![],
@ -473,7 +568,7 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()
        )
        .await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(
        out.contains(
            "patch rejected: writing outside of the project; rejected by user approval settings"
@ -488,7 +583,13 @@ async fn apply_patch_cli_rejects_path_traversal_outside_workspace() -> Result<()
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -507,7 +608,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Resu

    let patch = "*** Begin Patch\n*** Update File: stay.txt\n*** Move to: ../escape-move.txt\n@@\n-from\n+to\n*** End Patch";
    let call_id = "apply-move-traversal";
-    mount_apply_patch(&harness, call_id, patch, "fail").await;
+    mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;

    let sandbox_policy = SandboxPolicy::WorkspaceWrite {
        writable_roots: vec![],
@ -519,7 +620,7 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Resu
        .submit_with_policy("attempt move traversal via apply_patch", sandbox_policy)
        .await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(
        out.contains(
            "patch rejected: writing outside of the project; rejected by user approval settings"
@ -535,7 +636,13 @@ async fn apply_patch_cli_rejects_move_path_traversal_outside_workspace() -> Resu
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_verification_failure_has_no_side_effects(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness_with(|config| {
@ -547,7 +654,7 @@ async fn apply_patch_cli_verification_failure_has_no_side_effects() -> Result<()
    let call_id = "apply-partial-no-side-effects";
    let patch = "*** Begin Patch\n*** Add File: created.txt\n+hello\n*** Update File: missing.txt\n@@\n-old\n+new\n*** End Patch";

-    mount_apply_patch(&harness, call_id, patch, "failed").await;
+    mount_apply_patch(&harness, call_id, patch, "failed", model_output).await;

    harness.submit("attempt partial apply patch").await?;

@ -700,7 +807,14 @@ async fn apply_patch_function_accepts_lenient_heredoc_wrapped_patch() -> Result<
        format!("*** Begin Patch\n*** Add File: {file_name}\n+lenient\n*** End Patch\n");
    let wrapped = format!("<<'EOF'\n{patch_inner}EOF\n");
    let call_id = "apply-lenient";
-    mount_apply_patch(&harness, call_id, &wrapped, "ok").await;
+    mount_apply_patch(
+        &harness,
+        call_id,
+        wrapped.as_str(),
+        "ok",
+        ApplyPatchModelOutput::Function,
+    )
+    .await;

    harness.submit("apply lenient heredoc patch").await?;

@ -710,7 +824,11 @@ async fn apply_patch_function_accepts_lenient_heredoc_wrapped_patch() -> Result<
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_end_of_file_anchor() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_end_of_file_anchor(model_output: ApplyPatchModelOutput) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -720,7 +838,7 @@ async fn apply_patch_cli_end_of_file_anchor() -> Result<()> {

    let patch = "*** Begin Patch\n*** Update File: tail.txt\n@@\n-last\n+end\n*** End of File\n*** End Patch";
    let call_id = "apply-eof";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply EOF-anchored patch").await?;
    assert_eq!(fs::read_to_string(&target)?, "alpha\nend\n");
@ -728,7 +846,13 @@ async fn apply_patch_cli_end_of_file_anchor() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_cli_missing_second_chunk_context_rejected() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_cli_missing_second_chunk_context_rejected(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -740,11 +864,11 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected() -> Result<()> {
    let patch =
        "*** Begin Patch\n*** Update File: two_chunks.txt\n@@\n-b\n+B\n\n-d\n+D\n*** End Patch";
    let call_id = "apply-missing-ctx-2nd";
-    mount_apply_patch(&harness, call_id, patch, "fail").await;
+    mount_apply_patch(&harness, call_id, patch, "fail", model_output).await;

    harness.submit("apply missing context second chunk").await?;

-    let out = harness.function_call_stdout(call_id).await;
+    let out = harness.apply_patch_output(call_id, model_output).await;
    assert!(out.contains("apply_patch verification failed"));
    assert!(
        out.contains("Failed to find expected lines in"),
@ -756,7 +880,13 @@ async fn apply_patch_cli_missing_second_chunk_context_rejected() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_emits_turn_diff_event_with_unified_diff() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_emits_turn_diff_event_with_unified_diff(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -767,16 +897,7 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff() -> Result<()> {
    let call_id = "apply-diff-event";
    let file = "udiff.txt";
    let patch = format!("*** Begin Patch\n*** Add File: {file}\n+hello\n*** End Patch\n");
-    let first = sse(vec![
-        ev_response_created("resp-1"),
-        ev_apply_patch_function_call(call_id, &patch),
-        ev_completed("resp-1"),
-    ]);
-    let second = sse(vec![
-        ev_assistant_message("msg-1", "ok"),
-        ev_completed("resp-2"),
-    ]);
-    mount_sse_sequence(harness.server(), vec![first, second]).await;
+    mount_apply_patch(&harness, call_id, patch.as_str(), "ok", model_output).await;

    let model = test.session_configured.model.clone();
    codex
@ -814,7 +935,13 @@ async fn apply_patch_emits_turn_diff_event_with_unified_diff() -> Result<()> {
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_turn_diff_for_rename_with_content_change() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_turn_diff_for_rename_with_content_change(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -829,16 +956,7 @@ async fn apply_patch_turn_diff_for_rename_with_content_change() -> Result<()> {
    // Patch: update + move
    let call_id = "apply-rename-change";
    let patch = "*** Begin Patch\n*** Update File: old.txt\n*** Move to: new.txt\n@@\n-old\n+new\n*** End Patch";
-    let first = sse(vec![
-        ev_response_created("resp-1"),
-        ev_apply_patch_function_call(call_id, patch),
-        ev_completed("resp-1"),
-    ]);
-    let second = sse(vec![
-        ev_assistant_message("msg-1", "ok"),
-        ev_completed("resp-2"),
-    ]);
-    mount_sse_sequence(harness.server(), vec![first, second]).await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    let model = test.session_configured.model.clone();
    codex
@ -1031,7 +1149,13 @@ async fn apply_patch_aggregates_diff_preserves_success_after_failure() -> Result
 }

 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
-async fn apply_patch_change_context_disambiguates_target() -> Result<()> {
+#[test_case(ApplyPatchModelOutput::Freeform)]
+#[test_case(ApplyPatchModelOutput::Function)]
+#[test_case(ApplyPatchModelOutput::Shell)]
+#[test_case(ApplyPatchModelOutput::ShellViaHeredoc)]
+async fn apply_patch_change_context_disambiguates_target(
+    model_output: ApplyPatchModelOutput,
+) -> Result<()> {
    skip_if_no_network!(Ok(()));

    let harness = apply_patch_harness().await?;
@ -1042,7 +1166,7 @@ async fn apply_patch_change_context_disambiguates_target() -> Result<()> {
    let patch =
        "*** Begin Patch\n*** Update File: multi_ctx.txt\n@@ fn b\n-x=10\n+x=11\n*** End Patch";
    let call_id = "apply-ctx";
-    mount_apply_patch(&harness, call_id, patch, "ok").await;
+    mount_apply_patch(&harness, call_id, patch, "ok", model_output).await;

    harness.submit("apply with change_context").await?;

--- a/codex-rs/core/tests/suite/apply_patch_freeform.rs
+++ b/codex-rs/core/tests/suite/apply_patch_freeform.rs
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@ -18,8 +18,6 @@ mod abort_tasks;
 #[cfg(not(target_os = "windows"))]
 mod apply_patch_cli;
 #[cfg(not(target_os = "windows"))]
-mod apply_patch_freeform;
-#[cfg(not(target_os = "windows"))]
 mod approvals;
 mod auth_refresh;
 mod cli_stream;