From 7e980d7db665ebfe365c6aeb546405d59d91ff32 Mon Sep 17 00:00:00 2001
From: Curtis 'Fjord' Hawthorne <fjord@openai.com>
Date: Thu, 26 Feb 2026 18:17:46 -0800
Subject: [PATCH] Support multimodal custom tool outputs (#12948)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

## Summary

This changes `custom_tool_call_output` to use the same output payload
shape as `function_call_output`, so freeform tools can return either
plain text or structured content items.

The main goal is to let `js_repl` return image content from nested
`view_image` calls in its own `custom_tool_call_output`, instead of
relying on a separate injected message.

## What changed

- Changed `custom_tool_call_output.output` from `string` to
`FunctionCallOutputPayload`
- Updated freeform tool plumbing to preserve structured output bodies
- Updated `js_repl` to aggregate nested tool content items and attach
them to the outer `js_repl` result
- Removed the old `js_repl` special case that injected `view_image`
results as a separate pending user image message
- Updated normalization/history/truncation paths to handle multimodal
`custom_tool_call_output`
- Regenerated app-server protocol schema artifacts

## Behavior

Direct `view_image` calls still return a `function_call_output` with
image content.

When `view_image` is called inside `js_repl`, the outer `js_repl`
`custom_tool_call_output` now carries:
- an `input_text` item if the JS produced text output
- one or more `input_image` items from nested tool results

So the nested image result now stays inside the `js_repl` tool output
instead of being injected as a separate message.

## Compatibility

This is intended to be backward-compatible for resumed conversations.

Older histories that stored `custom_tool_call_output.output` as a plain
string still deserialize correctly, and older histories that used the
previous injected-image-message flow also continue to resume.

Added regression coverage for resuming a pre-change rollout containing:
- string-valued `custom_tool_call_output`
- legacy injected image message history


#### [git stack](https://github.com/magus/git-stack-cli)
- 👉 `1` https://github.com/openai/codex/pull/12948
---
 .../schema/json/ClientRequest.json            |   2 +-
 .../schema/json/EventMsg.json                 |   2 +-
 .../codex_app_server_protocol.schemas.json    |   2 +-
 .../RawResponseItemCompletedNotification.json |   2 +-
 .../schema/json/v2/ThreadResumeParams.json    |   2 +-
 .../schema/typescript/ResponseItem.ts         |   2 +-
 codex-rs/core/src/client_common.rs            |  69 ++++-
 codex-rs/core/src/context_manager/history.rs  |  49 ++--
 .../core/src/context_manager/history_tests.rs |  76 +++++-
 .../core/src/context_manager/normalize.rs     |  16 +-
 codex-rs/core/src/tools/context.rs            |  28 +-
 codex-rs/core/src/tools/handlers/js_repl.rs   |  16 +-
 codex-rs/core/src/tools/js_repl/mod.rs        | 241 ++++++++++++------
 codex-rs/core/src/tools/parallel.rs           |   5 +-
 codex-rs/core/src/tools/router.rs             |   5 +-
 codex-rs/core/tests/common/responses.rs       |  83 +++++-
 codex-rs/core/tests/common/test_codex.rs      |  48 +++-
 codex-rs/core/tests/suite/client.rs           | 144 ++++++++++-
 codex-rs/core/tests/suite/view_image.rs       |  46 ++--
 codex-rs/protocol/src/models.rs               |  27 +-
 20 files changed, 688 insertions(+), 177 deletions(-)

diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json
index 5ab197c84..03e42bed4 100644
--- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json
+++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json
@@ -1340,7 +1340,7 @@
               "type": "string"
             },
             "output": {
-              "type": "string"
+              "$ref": "#/definitions/FunctionCallOutputPayload"
             },
             "type": {
               "enum": [
diff --git a/codex-rs/app-server-protocol/schema/json/EventMsg.json b/codex-rs/app-server-protocol/schema/json/EventMsg.json
index c7bf90874..9f442ece9 100644
--- a/codex-rs/app-server-protocol/schema/json/EventMsg.json
+++ b/codex-rs/app-server-protocol/schema/json/EventMsg.json
@@ -4822,7 +4822,7 @@
               "type": "string"
             },
             "output": {
-              "type": "string"
+              "$ref": "#/definitions/FunctionCallOutputPayload"
             },
             "type": {
               "enum": [
diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
index 28a802bb3..1391a97ec 100644
--- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
+++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json
@@ -11421,7 +11421,7 @@
                 "type": "string"
               },
               "output": {
-                "type": "string"
+                "$ref": "#/definitions/v2/FunctionCallOutputPayload"
               },
               "type": {
                 "enum": [
diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json
index 748eeaab4..4717ff266 100644
--- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json
+++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json
@@ -565,7 +565,7 @@
               "type": "string"
             },
             "output": {
-              "type": "string"
+              "$ref": "#/definitions/FunctionCallOutputPayload"
             },
             "type": {
               "enum": [
diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json
index 29d6fbc6d..ef7607d3c 100644
--- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json
+++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json
@@ -615,7 +615,7 @@
               "type": "string"
             },
             "output": {
-              "type": "string"
+              "$ref": "#/definitions/FunctionCallOutputPayload"
             },
             "type": {
               "enum": [
diff --git a/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts b/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts
index 611c7fb22..dd7621f01 100644
--- a/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts
+++ b/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts
@@ -15,4 +15,4 @@ export type ResponseItem = { "type": "message", role: string, content: Array<Con
 /**
  * Set when using the Responses API.
  */
-call_id: string | null, status: LocalShellStatus, action: LocalShellAction, } | { "type": "function_call", name: string, arguments: string, call_id: string, } | { "type": "function_call_output", call_id: string, output: FunctionCallOutputPayload, } | { "type": "custom_tool_call", status?: string, call_id: string, name: string, input: string, } | { "type": "custom_tool_call_output", call_id: string, output: string, } | { "type": "web_search_call", status?: string, action?: WebSearchAction, } | { "type": "ghost_snapshot", ghost_commit: GhostCommit, } | { "type": "compaction", encrypted_content: string, } | { "type": "other" };
+call_id: string | null, status: LocalShellStatus, action: LocalShellAction, } | { "type": "function_call", name: string, arguments: string, call_id: string, } | { "type": "function_call_output", call_id: string, output: FunctionCallOutputPayload, } | { "type": "custom_tool_call", status?: string, call_id: string, name: string, input: string, } | { "type": "custom_tool_call_output", call_id: string, output: FunctionCallOutputPayload, } | { "type": "web_search_call", status?: string, action?: WebSearchAction, } | { "type": "ghost_snapshot", ghost_commit: GhostCommit, } | { "type": "compaction", encrypted_content: string, } | { "type": "other" };
diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs
index 33fc43872..3fea6eed5 100644
--- a/codex-rs/core/src/client_common.rs
+++ b/codex-rs/core/src/client_common.rs
@@ -84,19 +84,13 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) {
                 shell_call_ids.insert(call_id.clone());
             }
         }
-        ResponseItem::CustomToolCallOutput { call_id, output } => {
-            if shell_call_ids.remove(call_id)
-                && let Some(structured) = parse_structured_shell_output(output)
-            {
-                *output = structured
-            }
-        }
         ResponseItem::FunctionCall { name, call_id, .. }
             if is_shell_tool_name(name) || name == "apply_patch" =>
         {
             shell_call_ids.insert(call_id.clone());
         }
-        ResponseItem::FunctionCallOutput { call_id, output } => {
+        ResponseItem::FunctionCallOutput { call_id, output }
+        | ResponseItem::CustomToolCallOutput { call_id, output } => {
             if shell_call_ids.remove(call_id)
                 && let Some(structured) = output
                     .text_content()
@@ -240,6 +234,7 @@ mod tests {
     use codex_api::common::OpenAiVerbosity;
     use codex_api::common::TextControls;
     use codex_api::create_text_param_for_request;
+    use codex_protocol::models::FunctionCallOutputPayload;
     use pretty_assertions::assert_eq;
 
     use super::*;
@@ -343,4 +338,62 @@ mod tests {
         let v = serde_json::to_value(&req).expect("json");
         assert!(v.get("text").is_none());
     }
+
+    #[test]
+    fn reserializes_shell_outputs_for_function_and_custom_tool_calls() {
+        let raw_output = r#"{"output":"hello","metadata":{"exit_code":0,"duration_seconds":0.5}}"#;
+        let expected_output = "Exit code: 0\nWall time: 0.5 seconds\nOutput:\nhello";
+        let mut items = vec![
+            ResponseItem::FunctionCall {
+                id: None,
+                name: "shell".to_string(),
+                arguments: "{}".to_string(),
+                call_id: "call-1".to_string(),
+            },
+            ResponseItem::FunctionCallOutput {
+                call_id: "call-1".to_string(),
+                output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
+            },
+            ResponseItem::CustomToolCall {
+                id: None,
+                status: None,
+                call_id: "call-2".to_string(),
+                name: "apply_patch".to_string(),
+                input: "*** Begin Patch".to_string(),
+            },
+            ResponseItem::CustomToolCallOutput {
+                call_id: "call-2".to_string(),
+                output: FunctionCallOutputPayload::from_text(raw_output.to_string()),
+            },
+        ];
+
+        reserialize_shell_outputs(&mut items);
+
+        assert_eq!(
+            items,
+            vec![
+                ResponseItem::FunctionCall {
+                    id: None,
+                    name: "shell".to_string(),
+                    arguments: "{}".to_string(),
+                    call_id: "call-1".to_string(),
+                },
+                ResponseItem::FunctionCallOutput {
+                    call_id: "call-1".to_string(),
+                    output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
+                },
+                ResponseItem::CustomToolCall {
+                    id: None,
+                    status: None,
+                    call_id: "call-2".to_string(),
+                    name: "apply_patch".to_string(),
+                    input: "*** Begin Patch".to_string(),
+                },
+                ResponseItem::CustomToolCallOutput {
+                    call_id: "call-2".to_string(),
+                    output: FunctionCallOutputPayload::from_text(expected_output.to_string()),
+                },
+            ]
+        );
+    }
 }
diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs
index 016642b33..e4b7755ab 100644
--- a/codex-rs/core/src/context_manager/history.rs
+++ b/codex-rs/core/src/context_manager/history.rs
@@ -344,32 +344,21 @@ impl ContextManager {
         let policy_with_serialization_budget = policy * 1.2;
         match item {
             ResponseItem::FunctionCallOutput { call_id, output } => {
-                let body = match &output.body {
-                    FunctionCallOutputBody::Text(content) => FunctionCallOutputBody::Text(
-                        truncate_text(content, policy_with_serialization_budget),
-                    ),
-                    FunctionCallOutputBody::ContentItems(items) => {
-                        FunctionCallOutputBody::ContentItems(
-                            truncate_function_output_items_with_policy(
-                                items,
-                                policy_with_serialization_budget,
-                            ),
-                        )
-                    }
-                };
                 ResponseItem::FunctionCallOutput {
                     call_id: call_id.clone(),
-                    output: FunctionCallOutputPayload {
-                        body,
-                        success: output.success,
-                    },
+                    output: truncate_function_output_payload(
+                        output,
+                        policy_with_serialization_budget,
+                    ),
                 }
             }
             ResponseItem::CustomToolCallOutput { call_id, output } => {
-                let truncated = truncate_text(output, policy_with_serialization_budget);
                 ResponseItem::CustomToolCallOutput {
                     call_id: call_id.clone(),
-                    output: truncated,
+                    output: truncate_function_output_payload(
+                        output,
+                        policy_with_serialization_budget,
+                    ),
                 }
             }
             ResponseItem::Message { .. }
@@ -385,6 +374,25 @@ impl ContextManager {
     }
 }
 
+fn truncate_function_output_payload(
+    output: &FunctionCallOutputPayload,
+    policy: TruncationPolicy,
+) -> FunctionCallOutputPayload {
+    let body = match &output.body {
+        FunctionCallOutputBody::Text(content) => {
+            FunctionCallOutputBody::Text(truncate_text(content, policy))
+        }
+        FunctionCallOutputBody::ContentItems(items) => FunctionCallOutputBody::ContentItems(
+            truncate_function_output_items_with_policy(items, policy),
+        ),
+    };
+
+    FunctionCallOutputPayload {
+        body,
+        success: output.success,
+    }
+}
+
 /// API messages include every non-system item (user/assistant messages, reasoning,
 /// tool calls, tool outputs, shell calls, and web-search calls).
 fn is_api_message(message: &ResponseItem) -> bool {
@@ -508,7 +516,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) {
                 }
             }
         }
-        ResponseItem::FunctionCallOutput { output, .. } => {
+        ResponseItem::FunctionCallOutput { output, .. }
+        | ResponseItem::CustomToolCallOutput { output, .. } => {
             if let FunctionCallOutputBody::ContentItems(items) = &output.body {
                 for content_item in items {
                     if let FunctionCallOutputContentItem::InputImage { image_url } = content_item {
diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs
index 52fff81ed..798abc767 100644
--- a/codex-rs/core/src/context_manager/history_tests.rs
+++ b/codex-rs/core/src/context_manager/history_tests.rs
@@ -67,7 +67,7 @@ fn user_input_text_msg(text: &str) -> ResponseItem {
 fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem {
     ResponseItem::CustomToolCallOutput {
         call_id: call_id.to_string(),
-        output: output.to_string(),
+        output: FunctionCallOutputPayload::from_text(output.to_string()),
     }
 }
 
@@ -279,6 +279,24 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
                 },
             ]),
         },
+        ResponseItem::CustomToolCall {
+            id: None,
+            status: None,
+            call_id: "tool-1".to_string(),
+            name: "js_repl".to_string(),
+            input: "view_image".to_string(),
+        },
+        ResponseItem::CustomToolCallOutput {
+            call_id: "tool-1".to_string(),
+            output: FunctionCallOutputPayload::from_content_items(vec![
+                FunctionCallOutputContentItem::InputText {
+                    text: "js repl result".to_string(),
+                },
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: "https://example.com/js-repl-result.png".to_string(),
+                },
+            ]),
+        },
     ];
     let history = create_history_with_items(items);
     let text_only_modalities = vec![InputModality::Text];
@@ -321,6 +339,25 @@ fn for_prompt_strips_images_when_model_does_not_support_images() {
                 },
             ]),
         },
+        ResponseItem::CustomToolCall {
+            id: None,
+            status: None,
+            call_id: "tool-1".to_string(),
+            name: "js_repl".to_string(),
+            input: "view_image".to_string(),
+        },
+        ResponseItem::CustomToolCallOutput {
+            call_id: "tool-1".to_string(),
+            output: FunctionCallOutputPayload::from_content_items(vec![
+                FunctionCallOutputContentItem::InputText {
+                    text: "js repl result".to_string(),
+                },
+                FunctionCallOutputContentItem::InputText {
+                    text: "image content omitted because you do not support image input"
+                        .to_string(),
+                },
+            ]),
+        },
     ];
     assert_eq!(stripped, expected);
 
@@ -671,7 +708,7 @@ fn remove_first_item_handles_custom_tool_pair() {
         },
         ResponseItem::CustomToolCallOutput {
             call_id: "tool-1".to_string(),
-            output: "ok".to_string(),
+            output: FunctionCallOutputPayload::from_text("ok".to_string()),
         },
     ];
     let mut h = create_history_with_items(items);
@@ -750,7 +787,7 @@ fn record_items_truncates_custom_tool_call_output_content() {
     let long_output = line.repeat(2_500);
     let item = ResponseItem::CustomToolCallOutput {
         call_id: "tool-200".to_string(),
-        output: long_output.clone(),
+        output: FunctionCallOutputPayload::from_text(long_output.clone()),
     };
 
     history.record_items([&item], policy);
@@ -758,7 +795,8 @@ fn record_items_truncates_custom_tool_call_output_content() {
     assert_eq!(history.items.len(), 1);
     match &history.items[0] {
         ResponseItem::CustomToolCallOutput { output, .. } => {
-            assert_ne!(output, &long_output);
+            let output = output.text_content().unwrap_or_default();
+            assert_ne!(output, long_output);
             assert!(
                 output.contains("tokens truncated"),
                 "expected token-based truncation marker, got {output}"
@@ -949,7 +987,7 @@ fn normalize_adds_missing_output_for_custom_tool_call() {
             },
             ResponseItem::CustomToolCallOutput {
                 call_id: "tool-x".to_string(),
-                output: "aborted".to_string(),
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
             },
         ]
     );
@@ -1016,7 +1054,7 @@ fn normalize_removes_orphan_function_call_output() {
 fn normalize_removes_orphan_custom_tool_call_output() {
     let items = vec![ResponseItem::CustomToolCallOutput {
         call_id: "orphan-2".to_string(),
-        output: "ok".to_string(),
+        output: FunctionCallOutputPayload::from_text("ok".to_string()),
     }];
     let mut h = create_history_with_items(items);
 
@@ -1089,7 +1127,7 @@ fn normalize_mixed_inserts_and_removals() {
             },
             ResponseItem::CustomToolCallOutput {
                 call_id: "t1".to_string(),
-                output: "aborted".to_string(),
+                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
             },
             ResponseItem::LocalShellCall {
                 id: None,
@@ -1191,7 +1229,7 @@ fn normalize_removes_orphan_function_call_output_panics_in_debug() {
 fn normalize_removes_orphan_custom_tool_call_output_panics_in_debug() {
     let items = vec![ResponseItem::CustomToolCallOutput {
         call_id: "orphan-2".to_string(),
-        output: "ok".to_string(),
+        output: FunctionCallOutputPayload::from_text("ok".to_string()),
     }];
     let mut h = create_history_with_items(items);
     h.normalize_history(&default_input_modalities());
@@ -1294,6 +1332,28 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() {
     assert!(estimated < raw_len);
 }
 
+#[test]
+fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() {
+    let payload = "C".repeat(50_000);
+    let image_url = format!("data:image/png;base64,{payload}");
+    let item = ResponseItem::CustomToolCallOutput {
+        call_id: "call-js-repl".to_string(),
+        output: FunctionCallOutputPayload::from_content_items(vec![
+            FunctionCallOutputContentItem::InputText {
+                text: "Screenshot captured".to_string(),
+            },
+            FunctionCallOutputContentItem::InputImage { image_url },
+        ]),
+    };
+
+    let raw_len = serde_json::to_string(&item).unwrap().len() as i64;
+    let estimated = estimate_response_item_model_visible_bytes(&item);
+    let expected = raw_len - payload.len() as i64 + IMAGE_BYTES_ESTIMATE;
+
+    assert_eq!(estimated, expected);
+    assert!(estimated < raw_len);
+}
+
 #[test]
 fn non_base64_image_urls_are_unchanged() {
     let message_item = ResponseItem::Message {
diff --git a/codex-rs/core/src/context_manager/normalize.rs b/codex-rs/core/src/context_manager/normalize.rs
index a4fe9e64f..572ac51fc 100644
--- a/codex-rs/core/src/context_manager/normalize.rs
+++ b/codex-rs/core/src/context_manager/normalize.rs
@@ -1,7 +1,6 @@
 use std::collections::HashSet;
 
 use codex_protocol::models::ContentItem;
-use codex_protocol::models::FunctionCallOutputBody;
 use codex_protocol::models::FunctionCallOutputContentItem;
 use codex_protocol::models::FunctionCallOutputPayload;
 use codex_protocol::models::ResponseItem;
@@ -35,10 +34,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec<ResponseItem>) {
                         idx,
                         ResponseItem::FunctionCallOutput {
                             call_id: call_id.clone(),
-                            output: FunctionCallOutputPayload {
-                                body: FunctionCallOutputBody::Text("aborted".to_string()),
-                                ..Default::default()
-                            },
+                            output: FunctionCallOutputPayload::from_text("aborted".to_string()),
                         },
                     ));
                 }
@@ -59,7 +55,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec<ResponseItem>) {
                         idx,
                         ResponseItem::CustomToolCallOutput {
                             call_id: call_id.clone(),
-                            output: "aborted".to_string(),
+                            output: FunctionCallOutputPayload::from_text("aborted".to_string()),
                         },
                     ));
                 }
@@ -82,10 +78,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec<ResponseItem>) {
                             idx,
                             ResponseItem::FunctionCallOutput {
                                 call_id: call_id.clone(),
-                                output: FunctionCallOutputPayload {
-                                    body: FunctionCallOutputBody::Text("aborted".to_string()),
-                                    ..Default::default()
-                                },
+                                output: FunctionCallOutputPayload::from_text("aborted".to_string()),
                             },
                         ));
                     }
@@ -245,7 +238,8 @@ pub(crate) fn strip_images_when_unsupported(
                 }
                 *content = normalized_content;
             }
-            ResponseItem::FunctionCallOutput { output, .. } => {
+            ResponseItem::FunctionCallOutput { output, .. }
+            | ResponseItem::CustomToolCallOutput { output, .. } => {
                 if let Some(content_items) = output.content_items_mut() {
                     let mut normalized_content_items = Vec::with_capacity(content_items.len());
                     for content_item in content_items.iter() {
diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs
index 58925622a..0700b4d01 100644
--- a/codex-rs/core/src/tools/context.rs
+++ b/codex-rs/core/src/tools/context.rs
@@ -95,15 +95,12 @@ impl ToolOutput {
         match self {
             ToolOutput::Function { body, success } => {
                 // `custom_tool_call` is the Responses API item type for freeform
-                // tools (`ToolSpec::Freeform`, e.g. freeform `apply_patch`).
-                // Those payloads must round-trip as `custom_tool_call_output`
-                // with plain string output.
+                // tools (`ToolSpec::Freeform`, e.g. freeform `apply_patch` or
+                // `js_repl`).
                 if matches!(payload, ToolPayload::Custom { .. }) {
-                    // Freeform/custom tools (`custom_tool_call`) use the custom
-                    // output wire shape and remain string-only.
                     return ResponseInputItem::CustomToolCallOutput {
                         call_id: call_id.to_string(),
-                        output: body.to_text().unwrap_or_default(),
+                        output: FunctionCallOutputPayload { body, success },
                     };
                 }
 
@@ -183,7 +180,9 @@ mod tests {
         match response {
             ResponseInputItem::CustomToolCallOutput { call_id, output } => {
                 assert_eq!(call_id, "call-42");
-                assert_eq!(output, "patched");
+                assert_eq!(output.text_content(), Some("patched"));
+                assert!(output.content_items().is_none());
+                assert_eq!(output.success, Some(true));
             }
             other => panic!("expected CustomToolCallOutput, got {other:?}"),
         }
@@ -234,8 +233,21 @@ mod tests {
 
         match response {
             ResponseInputItem::CustomToolCallOutput { call_id, output } => {
+                let expected = vec![
+                    FunctionCallOutputContentItem::InputText {
+                        text: "line 1".to_string(),
+                    },
+                    FunctionCallOutputContentItem::InputImage {
+                        image_url: "data:image/png;base64,AAA".to_string(),
+                    },
+                    FunctionCallOutputContentItem::InputText {
+                        text: "line 2".to_string(),
+                    },
+                ];
                 assert_eq!(call_id, "call-99");
-                assert_eq!(output, "line 1\nline 2");
+                assert_eq!(output.content_items(), Some(expected.as_slice()));
+                assert_eq!(output.body.to_text().as_deref(), Some("line 1\nline 2"));
+                assert_eq!(output.success, Some(true));
             }
             other => panic!("expected CustomToolCallOutput, got {other:?}"),
         }
diff --git a/codex-rs/core/src/tools/handlers/js_repl.rs b/codex-rs/core/src/tools/handlers/js_repl.rs
index 4488b4ea5..362d25b81 100644
--- a/codex-rs/core/src/tools/handlers/js_repl.rs
+++ b/codex-rs/core/src/tools/handlers/js_repl.rs
@@ -155,9 +155,13 @@ impl ToolHandler for JsReplHandler {
         };
 
         let content = result.output;
-        let items = vec![FunctionCallOutputContentItem::InputText {
-            text: content.clone(),
-        }];
+        let mut items = Vec::with_capacity(result.content_items.len() + 1);
+        if !content.is_empty() {
+            items.push(FunctionCallOutputContentItem::InputText {
+                text: content.clone(),
+            });
+        }
+        items.extend(result.content_items);
 
         emit_js_repl_exec_end(
             session.as_ref(),
@@ -170,7 +174,11 @@ impl ToolHandler for JsReplHandler {
         .await;
 
         Ok(ToolOutput::Function {
-            body: FunctionCallOutputBody::ContentItems(items),
+            body: if items.is_empty() {
+                FunctionCallOutputBody::Text(content)
+            } else {
+                FunctionCallOutputBody::ContentItems(items)
+            },
             success: Some(true),
         })
     }
diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs
index d2defa9db..a234fd0eb 100644
--- a/codex-rs/core/src/tools/js_repl/mod.rs
+++ b/codex-rs/core/src/tools/js_repl/mod.rs
@@ -104,6 +104,7 @@ pub struct JsReplArgs {
 #[derive(Clone, Debug)]
 pub struct JsExecResult {
     pub output: String,
+    pub content_items: Vec<FunctionCallOutputContentItem>,
 }
 
 struct KernelState {
@@ -125,6 +126,7 @@ struct ExecContext {
 #[derive(Default)]
 struct ExecToolCalls {
     in_flight: usize,
+    content_items: Vec<FunctionCallOutputContentItem>,
     notify: Arc<Notify>,
     cancel: CancellationToken,
 }
@@ -136,6 +138,7 @@ enum JsReplToolCallPayloadKind {
     FunctionText,
     FunctionContentItems,
     CustomText,
+    CustomContentItems,
     McpResult,
     McpErrorResult,
     Error,
@@ -369,6 +372,21 @@ impl JsReplManager {
         Some(state.cancel.clone())
     }
 
+    async fn record_exec_tool_call_content_items(
+        exec_tool_calls: &Arc<Mutex<HashMap<String, ExecToolCalls>>>,
+        exec_id: &str,
+        content_items: Vec<FunctionCallOutputContentItem>,
+    ) {
+        if content_items.is_empty() {
+            return;
+        }
+
+        let mut calls = exec_tool_calls.lock().await;
+        if let Some(state) = calls.get_mut(exec_id) {
+            state.content_items.extend(content_items);
+        }
+    }
+
     async fn finish_exec_tool_call(
         exec_tool_calls: &Arc<Mutex<HashMap<String, ExecToolCalls>>>,
         exec_id: &str,
@@ -592,11 +610,18 @@ impl JsReplManager {
                     output,
                 )
             }
-            ResponseInputItem::CustomToolCallOutput { output, .. } => Self::summarize_text_payload(
-                Some("custom_tool_call_output"),
-                JsReplToolCallPayloadKind::CustomText,
-                output,
-            ),
+            ResponseInputItem::CustomToolCallOutput { output, .. } => {
+                let payload_kind = if output.content_items().is_some() {
+                    JsReplToolCallPayloadKind::CustomContentItems
+                } else {
+                    JsReplToolCallPayloadKind::CustomText
+                };
+                Self::summarize_function_output_payload(
+                    "custom_tool_call_output",
+                    payload_kind,
+                    output,
+                )
+            }
             ResponseInputItem::McpToolCallOutput { result, .. } => match result {
                 Ok(result) => {
                     let output = FunctionCallOutputPayload::from(result);
@@ -769,7 +794,13 @@ impl JsReplManager {
         };
 
         match response {
-            ExecResultMessage::Ok { output } => Ok(JsExecResult { output }),
+            ExecResultMessage::Ok { content_items } => {
+                let (output, content_items) = split_exec_result_content_items(content_items);
+                Ok(JsExecResult {
+                    output,
+                    content_items,
+                })
+            }
             ExecResultMessage::Err { message } => Err(FunctionCallError::RespondToModel(message)),
         }
     }
@@ -1073,10 +1104,22 @@ impl JsReplManager {
                     error,
                 } => {
                     JsReplManager::wait_for_exec_tool_calls_map(&exec_tool_calls, &id).await;
+                    let content_items = {
+                        let calls = exec_tool_calls.lock().await;
+                        calls
+                            .get(&id)
+                            .map(|state| state.content_items.clone())
+                            .unwrap_or_default()
+                    };
                     let mut pending = pending_execs.lock().await;
                     if let Some(tx) = pending.remove(&id) {
                         let payload = if ok {
-                            ExecResultMessage::Ok { output }
+                            ExecResultMessage::Ok {
+                                content_items: build_exec_result_content_items(
+                                    output,
+                                    content_items,
+                                ),
+                            }
                         } else {
                             ExecResultMessage::Err {
                                 message: error
@@ -1133,7 +1176,11 @@ impl JsReplManager {
                                         response: None,
                                         error: Some("js_repl execution reset".to_string()),
                                     },
-                                    result = JsReplManager::run_tool_request(ctx, req) => result,
+                                    result = JsReplManager::run_tool_request(
+                                        ctx,
+                                        req,
+                                        Arc::clone(&exec_tool_calls_for_task),
+                                    ) => result,
                                 }
                             }
                             None => RunToolResult {
@@ -1227,7 +1274,11 @@ impl JsReplManager {
         }
     }
 
-    async fn run_tool_request(exec: ExecContext, req: RunToolRequest) -> RunToolResult {
+    async fn run_tool_request(
+        exec: ExecContext,
+        req: RunToolRequest,
+        exec_tool_calls: Arc<Mutex<HashMap<String, ExecToolCalls>>>,
+    ) -> RunToolResult {
         if is_js_repl_internal_tool(&req.tool_name) {
             let error = "js_repl cannot invoke itself".to_string();
             let summary = Self::summarize_tool_call_error(&error);
@@ -1300,39 +1351,13 @@ impl JsReplManager {
             .await
         {
             Ok(response) => {
-                if let ResponseInputItem::FunctionCallOutput { output, .. } = &response
-                    && let Some(items) = output.content_items()
-                {
-                    let mut has_image = false;
-                    let mut content = Vec::with_capacity(items.len());
-                    for item in items {
-                        match item {
-                            FunctionCallOutputContentItem::InputText { text } => {
-                                content.push(ContentItem::InputText { text: text.clone() });
-                            }
-                            FunctionCallOutputContentItem::InputImage { image_url } => {
-                                has_image = true;
-                                content.push(ContentItem::InputImage {
-                                    image_url: image_url.clone(),
-                                });
-                            }
-                        }
-                    }
-
-                    if has_image
-                        && session
-                            .inject_response_items(vec![ResponseInputItem::Message {
-                                role: "user".to_string(),
-                                content,
-                            }])
-                            .await
-                            .is_err()
-                    {
-                        warn!(
-                            tool_name = %tool_name,
-                            "js_repl tool call returned image content but there was no active turn to attach it to"
-                        );
-                    }
+                if let Some(items) = response_content_items(&response) {
+                    Self::record_exec_tool_call_content_items(
+                        &exec_tool_calls,
+                        &req.exec_id,
+                        items,
+                    )
+                    .await;
                 }
 
                 let summary = Self::summarize_tool_call_response(&response);
@@ -1407,6 +1432,50 @@ impl JsReplManager {
     }
 }
 
+fn response_content_items(
+    response: &ResponseInputItem,
+) -> Option<Vec<FunctionCallOutputContentItem>> {
+    match response {
+        ResponseInputItem::FunctionCallOutput { output, .. }
+        | ResponseInputItem::CustomToolCallOutput { output, .. } => output
+            .content_items()
+            .map(<[FunctionCallOutputContentItem]>::to_vec),
+        ResponseInputItem::McpToolCallOutput { result, .. } => match result {
+            Ok(result) => FunctionCallOutputPayload::from(result)
+                .content_items()
+                .map(<[FunctionCallOutputContentItem]>::to_vec),
+            Err(_) => None,
+        },
+        ResponseInputItem::Message { .. } => None,
+    }
+}
+
+fn build_exec_result_content_items(
+    output: String,
+    content_items: Vec<FunctionCallOutputContentItem>,
+) -> Vec<FunctionCallOutputContentItem> {
+    let mut all_content_items = Vec::with_capacity(content_items.len() + 1);
+    all_content_items.push(FunctionCallOutputContentItem::InputText { text: output });
+    all_content_items.extend(content_items);
+    all_content_items
+}
+
+fn split_exec_result_content_items(
+    mut content_items: Vec<FunctionCallOutputContentItem>,
+) -> (String, Vec<FunctionCallOutputContentItem>) {
+    match content_items.first() {
+        Some(FunctionCallOutputContentItem::InputText { .. }) => {
+            let FunctionCallOutputContentItem::InputText { text } = content_items.remove(0) else {
+                unreachable!("first content item should be input_text");
+            };
+            (text, content_items)
+        }
+        Some(FunctionCallOutputContentItem::InputImage { .. }) | None => {
+            (String::new(), content_items)
+        }
+    }
+}
+
 fn is_freeform_tool(specs: &[ToolSpec], name: &str) -> bool {
     specs
         .iter()
@@ -1462,8 +1531,12 @@ struct RunToolResult {
 
 #[derive(Debug)]
 enum ExecResultMessage {
-    Ok { output: String },
-    Err { message: String },
+    Ok {
+        content_items: Vec<FunctionCallOutputContentItem>,
+    },
+    Err {
+        message: String,
+    },
 }
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
@@ -1601,7 +1674,6 @@ mod tests {
     use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem;
     use codex_protocol::dynamic_tools::DynamicToolResponse;
     use codex_protocol::dynamic_tools::DynamicToolSpec;
-    use codex_protocol::models::ContentItem;
     use codex_protocol::models::FunctionCallOutputContentItem;
     use codex_protocol::models::FunctionCallOutputPayload;
     use codex_protocol::models::ResponseInputItem;
@@ -1850,6 +1922,35 @@ mod tests {
         );
     }
 
+    #[test]
+    fn summarize_tool_call_response_for_multimodal_custom_output() {
+        let response = ResponseInputItem::CustomToolCallOutput {
+            call_id: "call-1".to_string(),
+            output: FunctionCallOutputPayload::from_content_items(vec![
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: "data:image/png;base64,abcd".to_string(),
+                },
+            ]),
+        };
+
+        let actual = JsReplManager::summarize_tool_call_response(&response);
+
+        assert_eq!(
+            actual,
+            JsReplToolCallResponseSummary {
+                response_type: Some("custom_tool_call_output".to_string()),
+                payload_kind: Some(JsReplToolCallPayloadKind::CustomContentItems),
+                payload_text_preview: None,
+                payload_text_length: None,
+                payload_item_count: Some(1),
+                text_item_count: Some(0),
+                image_item_count: Some(1),
+                structured_content_present: None,
+                result_is_error: None,
+            }
+        );
+    }
+
     #[test]
     fn summarize_tool_call_error_marks_error_payload() {
         let actual = JsReplManager::summarize_tool_call_error("tool failed");
@@ -2310,20 +2411,22 @@ console.log(out.output?.body?.text ?? "");
             )
             .await?;
         assert!(result.output.contains("function_call_output"));
-
-        let pending_input = session.get_pending_input().await;
-        let [ResponseInputItem::Message { role, content }] = pending_input.as_slice() else {
-            panic!(
-                "view_image should inject exactly one pending input message, got {pending_input:?}"
-            );
-        };
-        assert_eq!(role, "user");
-        let [ContentItem::InputImage { image_url }] = content.as_slice() else {
-            panic!(
-                "view_image should inject exactly one input_image content item, got {content:?}"
-            );
+        assert_eq!(
+            result.content_items.as_slice(),
+            [FunctionCallOutputContentItem::InputImage {
+                image_url:
+                    "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="
+                        .to_string(),
+            }]
+            .as_slice()
+        );
+        let [FunctionCallOutputContentItem::InputImage { image_url }] =
+            result.content_items.as_slice()
+        else {
+            panic!("view_image should return exactly one input_image content item");
         };
         assert!(image_url.starts_with("data:image/png;base64,"));
+        assert!(session.get_pending_input().await.is_empty());
 
         Ok(())
     }
@@ -2404,22 +2507,18 @@ console.log(out.type);
         response_watcher_result?;
         let result = result?;
         assert!(result.output.contains("function_call_output"));
-
-        let pending_input = session.get_pending_input().await;
         assert_eq!(
-            pending_input,
-            vec![ResponseInputItem::Message {
-                role: "user".to_string(),
-                content: vec![
-                    ContentItem::InputText {
-                        text: "inline image note".to_string(),
-                    },
-                    ContentItem::InputImage {
-                        image_url: image_url.to_string(),
-                    },
-                ],
-            }]
+            result.content_items,
+            vec![
+                FunctionCallOutputContentItem::InputText {
+                    text: "inline image note".to_string(),
+                },
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: image_url.to_string(),
+                },
+            ]
         );
+        assert!(session.get_pending_input().await.is_empty());
 
         Ok(())
     }
diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs
index 04d505c58..5f2fc89e5 100644
--- a/codex-rs/core/src/tools/parallel.rs
+++ b/codex-rs/core/src/tools/parallel.rs
@@ -117,7 +117,10 @@ impl ToolCallRuntime {
         match &call.payload {
             ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
                 call_id: call.call_id.clone(),
-                output: Self::abort_message(call, secs),
+                output: FunctionCallOutputPayload {
+                    body: FunctionCallOutputBody::Text(Self::abort_message(call, secs)),
+                    ..Default::default()
+                },
             },
             ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput {
                 call_id: call.call_id.clone(),
diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs
index 4897b4ea6..a55fb5fd5 100644
--- a/codex-rs/core/src/tools/router.rs
+++ b/codex-rs/core/src/tools/router.rs
@@ -197,7 +197,10 @@ impl ToolRouter {
         if payload_outputs_custom {
             ResponseInputItem::CustomToolCallOutput {
                 call_id,
-                output: message,
+                output: codex_protocol::models::FunctionCallOutputPayload {
+                    body: FunctionCallOutputBody::Text(message),
+                    success: Some(false),
+                },
             }
         } else {
             ResponseInputItem::FunctionCallOutput {
diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs
index 6a073d926..bb6de200a 100644
--- a/codex-rs/core/tests/common/responses.rs
+++ b/codex-rs/core/tests/common/responses.rs
@@ -264,7 +264,7 @@ impl ResponsesRequest {
             .cloned()
             .unwrap_or(Value::Null);
         match output {
-            Value::String(text) => Some((Some(text), None)),
+            Value::String(_) | Value::Array(_) => Some((output_value_to_text(&output), None)),
             Value::Object(obj) => Some((
                 obj.get("content")
                     .and_then(Value::as_str)
@@ -296,6 +296,87 @@ impl ResponsesRequest {
     }
 }
 
+pub(crate) fn output_value_to_text(value: &Value) -> Option<String> {
+    match value {
+        Value::String(text) => Some(text.clone()),
+        Value::Array(items) => match items.as_slice() {
+            [item] if item.get("type").and_then(Value::as_str) == Some("input_text") => {
+                item.get("text").and_then(Value::as_str).map(str::to_string)
+            }
+            [_] | [] | [_, _, ..] => None,
+        },
+        Value::Object(_) | Value::Number(_) | Value::Bool(_) | Value::Null => None,
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    use wiremock::http::HeaderMap;
+    use wiremock::http::Method;
+
+    fn request_with_input(input: Value) -> ResponsesRequest {
+        ResponsesRequest(wiremock::Request {
+            url: "http://localhost/v1/responses"
+                .parse()
+                .expect("valid request url"),
+            method: Method::POST,
+            headers: HeaderMap::new(),
+            body: serde_json::to_vec(&serde_json::json!({ "input": input }))
+                .expect("serialize request body"),
+        })
+    }
+
+    #[test]
+    fn call_output_content_and_success_returns_only_single_text_content_item() {
+        let single_text = request_with_input(serde_json::json!([
+            {
+                "type": "function_call_output",
+                "call_id": "call-1",
+                "output": [{ "type": "input_text", "text": "hello" }]
+            },
+            {
+                "type": "custom_tool_call_output",
+                "call_id": "call-2",
+                "output": [{ "type": "input_text", "text": "world" }]
+            }
+        ]));
+        assert_eq!(
+            single_text.function_call_output_content_and_success("call-1"),
+            Some((Some("hello".to_string()), None))
+        );
+        assert_eq!(
+            single_text.custom_tool_call_output_content_and_success("call-2"),
+            Some((Some("world".to_string()), None))
+        );
+
+        let mixed_content = request_with_input(serde_json::json!([
+            {
+                "type": "function_call_output",
+                "call_id": "call-3",
+                "output": [
+                    { "type": "input_text", "text": "hello" },
+                    { "type": "input_image", "image_url": "data:image/png;base64,abc" }
+                ]
+            },
+            {
+                "type": "custom_tool_call_output",
+                "call_id": "call-4",
+                "output": [{ "type": "input_image", "image_url": "data:image/png;base64,abc" }]
+            }
+        ]));
+        assert_eq!(
+            mixed_content.function_call_output_content_and_success("call-3"),
+            Some((None, None))
+        );
+        assert_eq!(
+            mixed_content.custom_tool_call_output_content_and_success("call-4"),
+            Some((None, None))
+        );
+    }
+}
+
 #[derive(Debug, Clone)]
 pub struct WebSocketRequest {
     body: Value,
diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs
index 4253b4b68..bd15c6d7e 100644
--- a/codex-rs/core/tests/common/test_codex.rs
+++ b/codex-rs/core/tests/common/test_codex.rs
@@ -23,6 +23,7 @@ use wiremock::MockServer;
 
 use crate::load_default_config_for_test;
 use crate::responses::WebSocketTestServer;
+use crate::responses::output_value_to_text;
 use crate::responses::start_mock_server;
 use crate::streaming_sse::StreamingSseServer;
 use crate::wait_for_event;
@@ -394,11 +395,7 @@ impl TestCodexHarness {
 
     pub async fn custom_tool_call_output(&self, call_id: &str) -> String {
         let bodies = self.request_bodies().await;
-        custom_tool_call_output(&bodies, call_id)
-            .get("output")
-            .and_then(Value::as_str)
-            .expect("output string")
-            .to_string()
+        custom_tool_call_output_text(&bodies, call_id)
     }
 
     pub async fn apply_patch_output(
@@ -433,6 +430,14 @@ fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value
     panic!("custom_tool_call_output {call_id} not found");
 }
 
+fn custom_tool_call_output_text(bodies: &[Value], call_id: &str) -> String {
+    let output = custom_tool_call_output(bodies, call_id)
+        .get("output")
+        .unwrap_or_else(|| panic!("custom_tool_call_output {call_id} missing output"));
+    output_value_to_text(output)
+        .unwrap_or_else(|| panic!("custom_tool_call_output {call_id} missing text output"))
+}
+
 fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value {
     for body in bodies {
         if let Some(items) = body.get("input").and_then(Value::as_array) {
@@ -456,3 +461,36 @@ pub fn test_codex() -> TestCodexBuilder {
         home: None,
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use pretty_assertions::assert_eq;
+    use serde_json::json;
+
+    #[test]
+    fn custom_tool_call_output_text_returns_output_text() {
+        let bodies = vec![json!({
+            "input": [{
+                "type": "custom_tool_call_output",
+                "call_id": "call-1",
+                "output": "hello"
+            }]
+        })];
+
+        assert_eq!(custom_tool_call_output_text(&bodies, "call-1"), "hello");
+    }
+
+    #[test]
+    #[should_panic(expected = "custom_tool_call_output call-2 missing output")]
+    fn custom_tool_call_output_text_panics_when_output_is_missing() {
+        let bodies = vec![json!({
+            "input": [{
+                "type": "custom_tool_call_output",
+                "call_id": "call-2"
+            }]
+        })];
+
+        let _ = custom_tool_call_output_text(&bodies, "call-2");
+    }
+}
diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs
index 082b41a37..1de0522cb 100644
--- a/codex-rs/core/tests/suite/client.rs
+++ b/codex-rs/core/tests/suite/client.rs
@@ -35,6 +35,10 @@ use codex_protocol::openai_models::ModelsResponse;
 use codex_protocol::openai_models::ReasoningEffort;
 use codex_protocol::protocol::EventMsg;
 use codex_protocol::protocol::Op;
+use codex_protocol::protocol::RolloutItem;
+use codex_protocol::protocol::RolloutLine;
+use codex_protocol::protocol::SessionMeta;
+use codex_protocol::protocol::SessionMetaLine;
 use codex_protocol::protocol::SessionSource;
 use codex_protocol::user_input::UserInput;
 use core_test_support::apps_test_server::AppsTestServer;
@@ -344,6 +348,144 @@ async fn resume_includes_initial_messages_and_sends_prior_items() {
     assert!(pos_environment < pos_new_user);
 }
 
+#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
+async fn resume_replays_legacy_js_repl_image_rollout_shapes() {
+    skip_if_no_network!();
+
+    // Early js_repl builds persisted image tool results as two separate rollout items:
+    // a string-valued custom_tool_call_output plus a standalone user input_image message.
+    // Current image tests cover today's shapes; this keeps resume compatibility for that
+    // legacy rollout representation.
+    let legacy_custom_tool_call = ResponseItem::CustomToolCall {
+        id: None,
+        status: None,
+        call_id: "legacy-js-call".to_string(),
+        name: "js_repl".to_string(),
+        input: "console.log('legacy image flow')".to_string(),
+    };
+    let legacy_image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==";
+    let rollout = vec![
+        RolloutLine {
+            timestamp: "2024-01-01T00:00:00.000Z".to_string(),
+            item: RolloutItem::SessionMeta(SessionMetaLine {
+                meta: SessionMeta {
+                    id: ThreadId::default(),
+                    timestamp: "2024-01-01T00:00:00Z".to_string(),
+                    cwd: ".".into(),
+                    originator: "test_originator".to_string(),
+                    cli_version: "test_version".to_string(),
+                    model_provider: Some("test-provider".to_string()),
+                    ..Default::default()
+                },
+                git: None,
+            }),
+        },
+        RolloutLine {
+            timestamp: "2024-01-01T00:00:01.000Z".to_string(),
+            item: RolloutItem::ResponseItem(legacy_custom_tool_call),
+        },
+        RolloutLine {
+            timestamp: "2024-01-01T00:00:02.000Z".to_string(),
+            item: RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput {
+                call_id: "legacy-js-call".to_string(),
+                output: FunctionCallOutputPayload::from_text("legacy js_repl stdout".to_string()),
+            }),
+        },
+        RolloutLine {
+            timestamp: "2024-01-01T00:00:03.000Z".to_string(),
+            item: RolloutItem::ResponseItem(ResponseItem::Message {
+                id: None,
+                role: "user".to_string(),
+                content: vec![ContentItem::InputImage {
+                    image_url: legacy_image_url.to_string(),
+                }],
+                end_turn: None,
+                phase: None,
+            }),
+        },
+    ];
+
+    let tmpdir = TempDir::new().unwrap();
+    let session_path = tmpdir
+        .path()
+        .join("resume-legacy-js-repl-image-rollout.jsonl");
+    let mut f = std::fs::File::create(&session_path).unwrap();
+    for line in rollout {
+        writeln!(f, "{}", serde_json::to_string(&line).unwrap()).unwrap();
+    }
+
+    let server = MockServer::start().await;
+    let resp_mock = mount_sse_once(
+        &server,
+        sse(vec![ev_response_created("resp1"), ev_completed("resp1")]),
+    )
+    .await;
+
+    let codex_home = Arc::new(TempDir::new().unwrap());
+    let mut builder = test_codex().with_model("gpt-5.1");
+    let test = builder
+        .resume(&server, codex_home, session_path.clone())
+        .await
+        .expect("resume conversation");
+    test.submit_turn("after resume").await.unwrap();
+
+    let input = resp_mock.single_request().input();
+
+    let legacy_output_index = input
+        .iter()
+        .position(|item| {
+            item.get("type").and_then(|value| value.as_str()) == Some("custom_tool_call_output")
+                && item.get("call_id").and_then(|value| value.as_str()) == Some("legacy-js-call")
+        })
+        .expect("legacy custom tool output should be replayed");
+    assert_eq!(
+        input[legacy_output_index]
+            .get("output")
+            .and_then(|value| value.as_str()),
+        Some("legacy js_repl stdout")
+    );
+
+    let legacy_image_index = input
+        .iter()
+        .position(|item| {
+            item.get("type").and_then(|value| value.as_str()) == Some("message")
+                && item.get("role").and_then(|value| value.as_str()) == Some("user")
+                && item
+                    .get("content")
+                    .and_then(|value| value.as_array())
+                    .is_some_and(|content| {
+                        content.iter().any(|entry| {
+                            entry.get("type").and_then(|value| value.as_str())
+                                == Some("input_image")
+                                && entry.get("image_url").and_then(|value| value.as_str())
+                                    == Some(legacy_image_url)
+                        })
+                    })
+        })
+        .expect("legacy injected image message should be replayed");
+
+    let new_user_index = input
+        .iter()
+        .position(|item| {
+            item.get("type").and_then(|value| value.as_str()) == Some("message")
+                && item.get("role").and_then(|value| value.as_str()) == Some("user")
+                && item
+                    .get("content")
+                    .and_then(|value| value.as_array())
+                    .is_some_and(|content| {
+                        content.iter().any(|entry| {
+                            entry.get("type").and_then(|value| value.as_str()) == Some("input_text")
+                                && entry.get("text").and_then(|value| value.as_str())
+                                    == Some("after resume")
+                        })
+                    })
+        })
+        .expect("new user message should be present");
+
+    assert!(legacy_output_index < new_user_index);
+    assert!(legacy_image_index < new_user_index);
+}
+
 #[tokio::test(flavor = "multi_thread", worker_threads = 2)]
 async fn includes_conversation_id_and_model_headers_in_request() {
     skip_if_no_network!();
@@ -1565,7 +1707,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() {
     });
     prompt.input.push(ResponseItem::CustomToolCallOutput {
         call_id: "custom-tool-call-id".into(),
-        output: "ok".into(),
+        output: FunctionCallOutputPayload::from_text("ok".into()),
     });
 
     let mut stream = client_session
diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs
index c4f8ebec3..43d14e678 100644
--- a/codex-rs/core/tests/suite/view_image.rs
+++ b/codex-rs/core/tests/suite/view_image.rs
@@ -358,40 +358,26 @@ console.log(out.output?.body?.text ?? "");
     .await;
 
     let req = mock.single_request();
-    let (js_repl_output, js_repl_success) = req
-        .custom_tool_call_output_content_and_success(call_id)
-        .expect("custom tool output present");
-    let js_repl_output = js_repl_output.expect("custom tool output text present");
-    assert_ne!(
-        js_repl_success,
-        Some(false),
-        "js_repl call failed unexpectedly: {js_repl_output}"
+    let body = req.body_json();
+    assert_eq!(
+        image_messages(&body).len(),
+        0,
+        "js_repl view_image should not inject a pending input image message"
     );
 
-    let body = req.body_json();
-    let image_messages = image_messages(&body);
-    assert_eq!(
-        image_messages.len(),
-        1,
-        "js_repl view_image should inject exactly one pending input image message"
-    );
-    let image_message = image_messages
-        .into_iter()
-        .next()
-        .expect("pending input image message not included in request");
-    let image_url = image_message
-        .get("content")
+    let custom_output = req.custom_tool_call_output(call_id);
+    let output_items = custom_output
+        .get("output")
         .and_then(Value::as_array)
-        .and_then(|content| {
-            content.iter().find_map(|span| {
-                if span.get("type").and_then(Value::as_str) == Some("input_image") {
-                    span.get("image_url").and_then(Value::as_str)
-                } else {
-                    None
-                }
-            })
+        .expect("custom_tool_call_output should be a content item array");
+    let image_url = output_items
+        .iter()
+        .find_map(|item| {
+            (item.get("type").and_then(Value::as_str) == Some("input_image"))
+                .then(|| item.get("image_url").and_then(Value::as_str))
+                .flatten()
         })
-        .expect("image_url present");
+        .expect("image_url present in js_repl custom tool output");
     assert!(
         image_url.starts_with("data:image/png;base64,"),
         "expected png data URL, got {image_url}"
diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs
index d74401c60..14cb96828 100644
--- a/codex-rs/protocol/src/models.rs
+++ b/codex-rs/protocol/src/models.rs
@@ -161,7 +161,7 @@ pub enum ResponseInputItem {
     },
     CustomToolCallOutput {
         call_id: String,
-        output: String,
+        output: FunctionCallOutputPayload,
     },
 }
 
@@ -261,9 +261,12 @@ pub enum ResponseItem {
         name: String,
         input: String,
     },
+    // `custom_tool_call_output.output` uses the same wire encoding as
+    // `function_call_output.output` so freeform tools can return either plain
+    // text or structured content items.
     CustomToolCallOutput {
         call_id: String,
-        output: String,
+        output: FunctionCallOutputPayload,
     },
     // Emitted by the Responses API when the agent triggers a web search.
     // Example payload (from SSE `response.output_item.done`):
@@ -1538,6 +1541,26 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn serializes_custom_tool_image_outputs_as_array() -> Result<()> {
+        let item = ResponseInputItem::CustomToolCallOutput {
+            call_id: "call1".into(),
+            output: FunctionCallOutputPayload::from_content_items(vec![
+                FunctionCallOutputContentItem::InputImage {
+                    image_url: "data:image/png;base64,BASE64".into(),
+                },
+            ]),
+        };
+
+        let json = serde_json::to_string(&item)?;
+        let v: serde_json::Value = serde_json::from_str(&json)?;
+
+        let output = v.get("output").expect("output field");
+        assert!(output.is_array(), "expected array output");
+
+        Ok(())
+    }
+
     #[test]
     fn preserves_existing_image_data_urls() -> Result<()> {
         let call_tool_result = CallToolResult {