From 7e980d7db665ebfe365c6aeb546405d59d91ff32 Mon Sep 17 00:00:00 2001 From: Curtis 'Fjord' Hawthorne Date: Thu, 26 Feb 2026 18:17:46 -0800 Subject: [PATCH] Support multimodal custom tool outputs (#12948) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary This changes `custom_tool_call_output` to use the same output payload shape as `function_call_output`, so freeform tools can return either plain text or structured content items. The main goal is to let `js_repl` return image content from nested `view_image` calls in its own `custom_tool_call_output`, instead of relying on a separate injected message. ## What changed - Changed `custom_tool_call_output.output` from `string` to `FunctionCallOutputPayload` - Updated freeform tool plumbing to preserve structured output bodies - Updated `js_repl` to aggregate nested tool content items and attach them to the outer `js_repl` result - Removed the old `js_repl` special case that injected `view_image` results as a separate pending user image message - Updated normalization/history/truncation paths to handle multimodal `custom_tool_call_output` - Regenerated app-server protocol schema artifacts ## Behavior Direct `view_image` calls still return a `function_call_output` with image content. When `view_image` is called inside `js_repl`, the outer `js_repl` `custom_tool_call_output` now carries: - an `input_text` item if the JS produced text output - one or more `input_image` items from nested tool results So the nested image result now stays inside the `js_repl` tool output instead of being injected as a separate message. ## Compatibility This is intended to be backward-compatible for resumed conversations. Older histories that stored `custom_tool_call_output.output` as a plain string still deserialize correctly, and older histories that used the previous injected-image-message flow also continue to resume. Added regression coverage for resuming a pre-change rollout containing: - string-valued `custom_tool_call_output` - legacy injected image message history #### [git stack](https://github.com/magus/git-stack-cli) - 👉 `1` https://github.com/openai/codex/pull/12948 --- .../schema/json/ClientRequest.json | 2 +- .../schema/json/EventMsg.json | 2 +- .../codex_app_server_protocol.schemas.json | 2 +- .../RawResponseItemCompletedNotification.json | 2 +- .../schema/json/v2/ThreadResumeParams.json | 2 +- .../schema/typescript/ResponseItem.ts | 2 +- codex-rs/core/src/client_common.rs | 69 ++++- codex-rs/core/src/context_manager/history.rs | 49 ++-- .../core/src/context_manager/history_tests.rs | 76 +++++- .../core/src/context_manager/normalize.rs | 16 +- codex-rs/core/src/tools/context.rs | 28 +- codex-rs/core/src/tools/handlers/js_repl.rs | 16 +- codex-rs/core/src/tools/js_repl/mod.rs | 241 ++++++++++++------ codex-rs/core/src/tools/parallel.rs | 5 +- codex-rs/core/src/tools/router.rs | 5 +- codex-rs/core/tests/common/responses.rs | 83 +++++- codex-rs/core/tests/common/test_codex.rs | 48 +++- codex-rs/core/tests/suite/client.rs | 144 ++++++++++- codex-rs/core/tests/suite/view_image.rs | 46 ++-- codex-rs/protocol/src/models.rs | 27 +- 20 files changed, 688 insertions(+), 177 deletions(-) diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 5ab197c84..03e42bed4 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -1340,7 +1340,7 @@ "type": "string" }, "output": { - "type": "string" + "$ref": "#/definitions/FunctionCallOutputPayload" }, "type": { "enum": [ diff --git a/codex-rs/app-server-protocol/schema/json/EventMsg.json b/codex-rs/app-server-protocol/schema/json/EventMsg.json index c7bf90874..9f442ece9 100644 --- a/codex-rs/app-server-protocol/schema/json/EventMsg.json +++ b/codex-rs/app-server-protocol/schema/json/EventMsg.json @@ -4822,7 +4822,7 @@ "type": "string" }, "output": { - "type": "string" + "$ref": "#/definitions/FunctionCallOutputPayload" }, "type": { "enum": [ diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 28a802bb3..1391a97ec 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -11421,7 +11421,7 @@ "type": "string" }, "output": { - "type": "string" + "$ref": "#/definitions/v2/FunctionCallOutputPayload" }, "type": { "enum": [ diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json index 748eeaab4..4717ff266 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json @@ -565,7 +565,7 @@ "type": "string" }, "output": { - "type": "string" + "$ref": "#/definitions/FunctionCallOutputPayload" }, "type": { "enum": [ diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json index 29d6fbc6d..ef7607d3c 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json @@ -615,7 +615,7 @@ "type": "string" }, "output": { - "type": "string" + "$ref": "#/definitions/FunctionCallOutputPayload" }, "type": { "enum": [ diff --git a/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts b/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts index 611c7fb22..dd7621f01 100644 --- a/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts +++ b/codex-rs/app-server-protocol/schema/typescript/ResponseItem.ts @@ -15,4 +15,4 @@ export type ResponseItem = { "type": "message", role: string, content: Array { - if shell_call_ids.remove(call_id) - && let Some(structured) = parse_structured_shell_output(output) - { - *output = structured - } - } ResponseItem::FunctionCall { name, call_id, .. } if is_shell_tool_name(name) || name == "apply_patch" => { shell_call_ids.insert(call_id.clone()); } - ResponseItem::FunctionCallOutput { call_id, output } => { + ResponseItem::FunctionCallOutput { call_id, output } + | ResponseItem::CustomToolCallOutput { call_id, output } => { if shell_call_ids.remove(call_id) && let Some(structured) = output .text_content() @@ -240,6 +234,7 @@ mod tests { use codex_api::common::OpenAiVerbosity; use codex_api::common::TextControls; use codex_api::create_text_param_for_request; + use codex_protocol::models::FunctionCallOutputPayload; use pretty_assertions::assert_eq; use super::*; @@ -343,4 +338,62 @@ mod tests { let v = serde_json::to_value(&req).expect("json"); assert!(v.get("text").is_none()); } + + #[test] + fn reserializes_shell_outputs_for_function_and_custom_tool_calls() { + let raw_output = r#"{"output":"hello","metadata":{"exit_code":0,"duration_seconds":0.5}}"#; + let expected_output = "Exit code: 0\nWall time: 0.5 seconds\nOutput:\nhello"; + let mut items = vec![ + ResponseItem::FunctionCall { + id: None, + name: "shell".to_string(), + arguments: "{}".to_string(), + call_id: "call-1".to_string(), + }, + ResponseItem::FunctionCallOutput { + call_id: "call-1".to_string(), + output: FunctionCallOutputPayload::from_text(raw_output.to_string()), + }, + ResponseItem::CustomToolCall { + id: None, + status: None, + call_id: "call-2".to_string(), + name: "apply_patch".to_string(), + input: "*** Begin Patch".to_string(), + }, + ResponseItem::CustomToolCallOutput { + call_id: "call-2".to_string(), + output: FunctionCallOutputPayload::from_text(raw_output.to_string()), + }, + ]; + + reserialize_shell_outputs(&mut items); + + assert_eq!( + items, + vec![ + ResponseItem::FunctionCall { + id: None, + name: "shell".to_string(), + arguments: "{}".to_string(), + call_id: "call-1".to_string(), + }, + ResponseItem::FunctionCallOutput { + call_id: "call-1".to_string(), + output: FunctionCallOutputPayload::from_text(expected_output.to_string()), + }, + ResponseItem::CustomToolCall { + id: None, + status: None, + call_id: "call-2".to_string(), + name: "apply_patch".to_string(), + input: "*** Begin Patch".to_string(), + }, + ResponseItem::CustomToolCallOutput { + call_id: "call-2".to_string(), + output: FunctionCallOutputPayload::from_text(expected_output.to_string()), + }, + ] + ); + } } diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index 016642b33..e4b7755ab 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -344,32 +344,21 @@ impl ContextManager { let policy_with_serialization_budget = policy * 1.2; match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let body = match &output.body { - FunctionCallOutputBody::Text(content) => FunctionCallOutputBody::Text( - truncate_text(content, policy_with_serialization_budget), - ), - FunctionCallOutputBody::ContentItems(items) => { - FunctionCallOutputBody::ContentItems( - truncate_function_output_items_with_policy( - items, - policy_with_serialization_budget, - ), - ) - } - }; ResponseItem::FunctionCallOutput { call_id: call_id.clone(), - output: FunctionCallOutputPayload { - body, - success: output.success, - }, + output: truncate_function_output_payload( + output, + policy_with_serialization_budget, + ), } } ResponseItem::CustomToolCallOutput { call_id, output } => { - let truncated = truncate_text(output, policy_with_serialization_budget); ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), - output: truncated, + output: truncate_function_output_payload( + output, + policy_with_serialization_budget, + ), } } ResponseItem::Message { .. } @@ -385,6 +374,25 @@ impl ContextManager { } } +fn truncate_function_output_payload( + output: &FunctionCallOutputPayload, + policy: TruncationPolicy, +) -> FunctionCallOutputPayload { + let body = match &output.body { + FunctionCallOutputBody::Text(content) => { + FunctionCallOutputBody::Text(truncate_text(content, policy)) + } + FunctionCallOutputBody::ContentItems(items) => FunctionCallOutputBody::ContentItems( + truncate_function_output_items_with_policy(items, policy), + ), + }; + + FunctionCallOutputPayload { + body, + success: output.success, + } +} + /// API messages include every non-system item (user/assistant messages, reasoning, /// tool calls, tool outputs, shell calls, and web-search calls). fn is_api_message(message: &ResponseItem) -> bool { @@ -508,7 +516,8 @@ fn image_data_url_estimate_adjustment(item: &ResponseItem) -> (i64, i64) { } } } - ResponseItem::FunctionCallOutput { output, .. } => { + ResponseItem::FunctionCallOutput { output, .. } + | ResponseItem::CustomToolCallOutput { output, .. } => { if let FunctionCallOutputBody::ContentItems(items) = &output.body { for content_item in items { if let FunctionCallOutputContentItem::InputImage { image_url } = content_item { diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index 52fff81ed..798abc767 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -67,7 +67,7 @@ fn user_input_text_msg(text: &str) -> ResponseItem { fn custom_tool_call_output(call_id: &str, output: &str) -> ResponseItem { ResponseItem::CustomToolCallOutput { call_id: call_id.to_string(), - output: output.to_string(), + output: FunctionCallOutputPayload::from_text(output.to_string()), } } @@ -279,6 +279,24 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, ]), }, + ResponseItem::CustomToolCall { + id: None, + status: None, + call_id: "tool-1".to_string(), + name: "js_repl".to_string(), + input: "view_image".to_string(), + }, + ResponseItem::CustomToolCallOutput { + call_id: "tool-1".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputText { + text: "js repl result".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "https://example.com/js-repl-result.png".to_string(), + }, + ]), + }, ]; let history = create_history_with_items(items); let text_only_modalities = vec![InputModality::Text]; @@ -321,6 +339,25 @@ fn for_prompt_strips_images_when_model_does_not_support_images() { }, ]), }, + ResponseItem::CustomToolCall { + id: None, + status: None, + call_id: "tool-1".to_string(), + name: "js_repl".to_string(), + input: "view_image".to_string(), + }, + ResponseItem::CustomToolCallOutput { + call_id: "tool-1".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputText { + text: "js repl result".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "image content omitted because you do not support image input" + .to_string(), + }, + ]), + }, ]; assert_eq!(stripped, expected); @@ -671,7 +708,7 @@ fn remove_first_item_handles_custom_tool_pair() { }, ResponseItem::CustomToolCallOutput { call_id: "tool-1".to_string(), - output: "ok".to_string(), + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -750,7 +787,7 @@ fn record_items_truncates_custom_tool_call_output_content() { let long_output = line.repeat(2_500); let item = ResponseItem::CustomToolCallOutput { call_id: "tool-200".to_string(), - output: long_output.clone(), + output: FunctionCallOutputPayload::from_text(long_output.clone()), }; history.record_items([&item], policy); @@ -758,7 +795,8 @@ fn record_items_truncates_custom_tool_call_output_content() { assert_eq!(history.items.len(), 1); match &history.items[0] { ResponseItem::CustomToolCallOutput { output, .. } => { - assert_ne!(output, &long_output); + let output = output.text_content().unwrap_or_default(); + assert_ne!(output, long_output); assert!( output.contains("tokens truncated"), "expected token-based truncation marker, got {output}" @@ -949,7 +987,7 @@ fn normalize_adds_missing_output_for_custom_tool_call() { }, ResponseItem::CustomToolCallOutput { call_id: "tool-x".to_string(), - output: "aborted".to_string(), + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -1016,7 +1054,7 @@ fn normalize_removes_orphan_function_call_output() { fn normalize_removes_orphan_custom_tool_call_output() { let items = vec![ResponseItem::CustomToolCallOutput { call_id: "orphan-2".to_string(), - output: "ok".to_string(), + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); @@ -1089,7 +1127,7 @@ fn normalize_mixed_inserts_and_removals() { }, ResponseItem::CustomToolCallOutput { call_id: "t1".to_string(), - output: "aborted".to_string(), + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ResponseItem::LocalShellCall { id: None, @@ -1191,7 +1229,7 @@ fn normalize_removes_orphan_function_call_output_panics_in_debug() { fn normalize_removes_orphan_custom_tool_call_output_panics_in_debug() { let items = vec![ResponseItem::CustomToolCallOutput { call_id: "orphan-2".to_string(), - output: "ok".to_string(), + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); h.normalize_history(&default_input_modalities()); @@ -1294,6 +1332,28 @@ fn image_data_url_payload_does_not_dominate_function_call_output_estimate() { assert!(estimated < raw_len); } +#[test] +fn image_data_url_payload_does_not_dominate_custom_tool_call_output_estimate() { + let payload = "C".repeat(50_000); + let image_url = format!("data:image/png;base64,{payload}"); + let item = ResponseItem::CustomToolCallOutput { + call_id: "call-js-repl".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputText { + text: "Screenshot captured".to_string(), + }, + FunctionCallOutputContentItem::InputImage { image_url }, + ]), + }; + + let raw_len = serde_json::to_string(&item).unwrap().len() as i64; + let estimated = estimate_response_item_model_visible_bytes(&item); + let expected = raw_len - payload.len() as i64 + IMAGE_BYTES_ESTIMATE; + + assert_eq!(estimated, expected); + assert!(estimated < raw_len); +} + #[test] fn non_base64_image_urls_are_unchanged() { let message_item = ResponseItem::Message { diff --git a/codex-rs/core/src/context_manager/normalize.rs b/codex-rs/core/src/context_manager/normalize.rs index a4fe9e64f..572ac51fc 100644 --- a/codex-rs/core/src/context_manager/normalize.rs +++ b/codex-rs/core/src/context_manager/normalize.rs @@ -1,7 +1,6 @@ use std::collections::HashSet; use codex_protocol::models::ContentItem; -use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -35,10 +34,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { idx, ResponseItem::FunctionCallOutput { call_id: call_id.clone(), - output: FunctionCallOutputPayload { - body: FunctionCallOutputBody::Text("aborted".to_string()), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, )); } @@ -59,7 +55,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { idx, ResponseItem::CustomToolCallOutput { call_id: call_id.clone(), - output: "aborted".to_string(), + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, )); } @@ -82,10 +78,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { idx, ResponseItem::FunctionCallOutput { call_id: call_id.clone(), - output: FunctionCallOutputPayload { - body: FunctionCallOutputBody::Text("aborted".to_string()), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, )); } @@ -245,7 +238,8 @@ pub(crate) fn strip_images_when_unsupported( } *content = normalized_content; } - ResponseItem::FunctionCallOutput { output, .. } => { + ResponseItem::FunctionCallOutput { output, .. } + | ResponseItem::CustomToolCallOutput { output, .. } => { if let Some(content_items) = output.content_items_mut() { let mut normalized_content_items = Vec::with_capacity(content_items.len()); for content_item in content_items.iter() { diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index 58925622a..0700b4d01 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -95,15 +95,12 @@ impl ToolOutput { match self { ToolOutput::Function { body, success } => { // `custom_tool_call` is the Responses API item type for freeform - // tools (`ToolSpec::Freeform`, e.g. freeform `apply_patch`). - // Those payloads must round-trip as `custom_tool_call_output` - // with plain string output. + // tools (`ToolSpec::Freeform`, e.g. freeform `apply_patch` or + // `js_repl`). if matches!(payload, ToolPayload::Custom { .. }) { - // Freeform/custom tools (`custom_tool_call`) use the custom - // output wire shape and remain string-only. return ResponseInputItem::CustomToolCallOutput { call_id: call_id.to_string(), - output: body.to_text().unwrap_or_default(), + output: FunctionCallOutputPayload { body, success }, }; } @@ -183,7 +180,9 @@ mod tests { match response { ResponseInputItem::CustomToolCallOutput { call_id, output } => { assert_eq!(call_id, "call-42"); - assert_eq!(output, "patched"); + assert_eq!(output.text_content(), Some("patched")); + assert!(output.content_items().is_none()); + assert_eq!(output.success, Some(true)); } other => panic!("expected CustomToolCallOutput, got {other:?}"), } @@ -234,8 +233,21 @@ mod tests { match response { ResponseInputItem::CustomToolCallOutput { call_id, output } => { + let expected = vec![ + FunctionCallOutputContentItem::InputText { + text: "line 1".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "line 2".to_string(), + }, + ]; assert_eq!(call_id, "call-99"); - assert_eq!(output, "line 1\nline 2"); + assert_eq!(output.content_items(), Some(expected.as_slice())); + assert_eq!(output.body.to_text().as_deref(), Some("line 1\nline 2")); + assert_eq!(output.success, Some(true)); } other => panic!("expected CustomToolCallOutput, got {other:?}"), } diff --git a/codex-rs/core/src/tools/handlers/js_repl.rs b/codex-rs/core/src/tools/handlers/js_repl.rs index 4488b4ea5..362d25b81 100644 --- a/codex-rs/core/src/tools/handlers/js_repl.rs +++ b/codex-rs/core/src/tools/handlers/js_repl.rs @@ -155,9 +155,13 @@ impl ToolHandler for JsReplHandler { }; let content = result.output; - let items = vec![FunctionCallOutputContentItem::InputText { - text: content.clone(), - }]; + let mut items = Vec::with_capacity(result.content_items.len() + 1); + if !content.is_empty() { + items.push(FunctionCallOutputContentItem::InputText { + text: content.clone(), + }); + } + items.extend(result.content_items); emit_js_repl_exec_end( session.as_ref(), @@ -170,7 +174,11 @@ impl ToolHandler for JsReplHandler { .await; Ok(ToolOutput::Function { - body: FunctionCallOutputBody::ContentItems(items), + body: if items.is_empty() { + FunctionCallOutputBody::Text(content) + } else { + FunctionCallOutputBody::ContentItems(items) + }, success: Some(true), }) } diff --git a/codex-rs/core/src/tools/js_repl/mod.rs b/codex-rs/core/src/tools/js_repl/mod.rs index d2defa9db..a234fd0eb 100644 --- a/codex-rs/core/src/tools/js_repl/mod.rs +++ b/codex-rs/core/src/tools/js_repl/mod.rs @@ -104,6 +104,7 @@ pub struct JsReplArgs { #[derive(Clone, Debug)] pub struct JsExecResult { pub output: String, + pub content_items: Vec, } struct KernelState { @@ -125,6 +126,7 @@ struct ExecContext { #[derive(Default)] struct ExecToolCalls { in_flight: usize, + content_items: Vec, notify: Arc, cancel: CancellationToken, } @@ -136,6 +138,7 @@ enum JsReplToolCallPayloadKind { FunctionText, FunctionContentItems, CustomText, + CustomContentItems, McpResult, McpErrorResult, Error, @@ -369,6 +372,21 @@ impl JsReplManager { Some(state.cancel.clone()) } + async fn record_exec_tool_call_content_items( + exec_tool_calls: &Arc>>, + exec_id: &str, + content_items: Vec, + ) { + if content_items.is_empty() { + return; + } + + let mut calls = exec_tool_calls.lock().await; + if let Some(state) = calls.get_mut(exec_id) { + state.content_items.extend(content_items); + } + } + async fn finish_exec_tool_call( exec_tool_calls: &Arc>>, exec_id: &str, @@ -592,11 +610,18 @@ impl JsReplManager { output, ) } - ResponseInputItem::CustomToolCallOutput { output, .. } => Self::summarize_text_payload( - Some("custom_tool_call_output"), - JsReplToolCallPayloadKind::CustomText, - output, - ), + ResponseInputItem::CustomToolCallOutput { output, .. } => { + let payload_kind = if output.content_items().is_some() { + JsReplToolCallPayloadKind::CustomContentItems + } else { + JsReplToolCallPayloadKind::CustomText + }; + Self::summarize_function_output_payload( + "custom_tool_call_output", + payload_kind, + output, + ) + } ResponseInputItem::McpToolCallOutput { result, .. } => match result { Ok(result) => { let output = FunctionCallOutputPayload::from(result); @@ -769,7 +794,13 @@ impl JsReplManager { }; match response { - ExecResultMessage::Ok { output } => Ok(JsExecResult { output }), + ExecResultMessage::Ok { content_items } => { + let (output, content_items) = split_exec_result_content_items(content_items); + Ok(JsExecResult { + output, + content_items, + }) + } ExecResultMessage::Err { message } => Err(FunctionCallError::RespondToModel(message)), } } @@ -1073,10 +1104,22 @@ impl JsReplManager { error, } => { JsReplManager::wait_for_exec_tool_calls_map(&exec_tool_calls, &id).await; + let content_items = { + let calls = exec_tool_calls.lock().await; + calls + .get(&id) + .map(|state| state.content_items.clone()) + .unwrap_or_default() + }; let mut pending = pending_execs.lock().await; if let Some(tx) = pending.remove(&id) { let payload = if ok { - ExecResultMessage::Ok { output } + ExecResultMessage::Ok { + content_items: build_exec_result_content_items( + output, + content_items, + ), + } } else { ExecResultMessage::Err { message: error @@ -1133,7 +1176,11 @@ impl JsReplManager { response: None, error: Some("js_repl execution reset".to_string()), }, - result = JsReplManager::run_tool_request(ctx, req) => result, + result = JsReplManager::run_tool_request( + ctx, + req, + Arc::clone(&exec_tool_calls_for_task), + ) => result, } } None => RunToolResult { @@ -1227,7 +1274,11 @@ impl JsReplManager { } } - async fn run_tool_request(exec: ExecContext, req: RunToolRequest) -> RunToolResult { + async fn run_tool_request( + exec: ExecContext, + req: RunToolRequest, + exec_tool_calls: Arc>>, + ) -> RunToolResult { if is_js_repl_internal_tool(&req.tool_name) { let error = "js_repl cannot invoke itself".to_string(); let summary = Self::summarize_tool_call_error(&error); @@ -1300,39 +1351,13 @@ impl JsReplManager { .await { Ok(response) => { - if let ResponseInputItem::FunctionCallOutput { output, .. } = &response - && let Some(items) = output.content_items() - { - let mut has_image = false; - let mut content = Vec::with_capacity(items.len()); - for item in items { - match item { - FunctionCallOutputContentItem::InputText { text } => { - content.push(ContentItem::InputText { text: text.clone() }); - } - FunctionCallOutputContentItem::InputImage { image_url } => { - has_image = true; - content.push(ContentItem::InputImage { - image_url: image_url.clone(), - }); - } - } - } - - if has_image - && session - .inject_response_items(vec![ResponseInputItem::Message { - role: "user".to_string(), - content, - }]) - .await - .is_err() - { - warn!( - tool_name = %tool_name, - "js_repl tool call returned image content but there was no active turn to attach it to" - ); - } + if let Some(items) = response_content_items(&response) { + Self::record_exec_tool_call_content_items( + &exec_tool_calls, + &req.exec_id, + items, + ) + .await; } let summary = Self::summarize_tool_call_response(&response); @@ -1407,6 +1432,50 @@ impl JsReplManager { } } +fn response_content_items( + response: &ResponseInputItem, +) -> Option> { + match response { + ResponseInputItem::FunctionCallOutput { output, .. } + | ResponseInputItem::CustomToolCallOutput { output, .. } => output + .content_items() + .map(<[FunctionCallOutputContentItem]>::to_vec), + ResponseInputItem::McpToolCallOutput { result, .. } => match result { + Ok(result) => FunctionCallOutputPayload::from(result) + .content_items() + .map(<[FunctionCallOutputContentItem]>::to_vec), + Err(_) => None, + }, + ResponseInputItem::Message { .. } => None, + } +} + +fn build_exec_result_content_items( + output: String, + content_items: Vec, +) -> Vec { + let mut all_content_items = Vec::with_capacity(content_items.len() + 1); + all_content_items.push(FunctionCallOutputContentItem::InputText { text: output }); + all_content_items.extend(content_items); + all_content_items +} + +fn split_exec_result_content_items( + mut content_items: Vec, +) -> (String, Vec) { + match content_items.first() { + Some(FunctionCallOutputContentItem::InputText { .. }) => { + let FunctionCallOutputContentItem::InputText { text } = content_items.remove(0) else { + unreachable!("first content item should be input_text"); + }; + (text, content_items) + } + Some(FunctionCallOutputContentItem::InputImage { .. }) | None => { + (String::new(), content_items) + } + } +} + fn is_freeform_tool(specs: &[ToolSpec], name: &str) -> bool { specs .iter() @@ -1462,8 +1531,12 @@ struct RunToolResult { #[derive(Debug)] enum ExecResultMessage { - Ok { output: String }, - Err { message: String }, + Ok { + content_items: Vec, + }, + Err { + message: String, + }, } #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -1601,7 +1674,6 @@ mod tests { use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse; use codex_protocol::dynamic_tools::DynamicToolSpec; - use codex_protocol::models::ContentItem; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; @@ -1850,6 +1922,35 @@ mod tests { ); } + #[test] + fn summarize_tool_call_response_for_multimodal_custom_output() { + let response = ResponseInputItem::CustomToolCallOutput { + call_id: "call-1".to_string(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,abcd".to_string(), + }, + ]), + }; + + let actual = JsReplManager::summarize_tool_call_response(&response); + + assert_eq!( + actual, + JsReplToolCallResponseSummary { + response_type: Some("custom_tool_call_output".to_string()), + payload_kind: Some(JsReplToolCallPayloadKind::CustomContentItems), + payload_text_preview: None, + payload_text_length: None, + payload_item_count: Some(1), + text_item_count: Some(0), + image_item_count: Some(1), + structured_content_present: None, + result_is_error: None, + } + ); + } + #[test] fn summarize_tool_call_error_marks_error_payload() { let actual = JsReplManager::summarize_tool_call_error("tool failed"); @@ -2310,20 +2411,22 @@ console.log(out.output?.body?.text ?? ""); ) .await?; assert!(result.output.contains("function_call_output")); - - let pending_input = session.get_pending_input().await; - let [ResponseInputItem::Message { role, content }] = pending_input.as_slice() else { - panic!( - "view_image should inject exactly one pending input message, got {pending_input:?}" - ); - }; - assert_eq!(role, "user"); - let [ContentItem::InputImage { image_url }] = content.as_slice() else { - panic!( - "view_image should inject exactly one input_image content item, got {content:?}" - ); + assert_eq!( + result.content_items.as_slice(), + [FunctionCallOutputContentItem::InputImage { + image_url: + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg==" + .to_string(), + }] + .as_slice() + ); + let [FunctionCallOutputContentItem::InputImage { image_url }] = + result.content_items.as_slice() + else { + panic!("view_image should return exactly one input_image content item"); }; assert!(image_url.starts_with("data:image/png;base64,")); + assert!(session.get_pending_input().await.is_empty()); Ok(()) } @@ -2404,22 +2507,18 @@ console.log(out.type); response_watcher_result?; let result = result?; assert!(result.output.contains("function_call_output")); - - let pending_input = session.get_pending_input().await; assert_eq!( - pending_input, - vec![ResponseInputItem::Message { - role: "user".to_string(), - content: vec![ - ContentItem::InputText { - text: "inline image note".to_string(), - }, - ContentItem::InputImage { - image_url: image_url.to_string(), - }, - ], - }] + result.content_items, + vec![ + FunctionCallOutputContentItem::InputText { + text: "inline image note".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: image_url.to_string(), + }, + ] ); + assert!(session.get_pending_input().await.is_empty()); Ok(()) } diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index 04d505c58..5f2fc89e5 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -117,7 +117,10 @@ impl ToolCallRuntime { match &call.payload { ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput { call_id: call.call_id.clone(), - output: Self::abort_message(call, secs), + output: FunctionCallOutputPayload { + body: FunctionCallOutputBody::Text(Self::abort_message(call, secs)), + ..Default::default() + }, }, ToolPayload::Mcp { .. } => ResponseInputItem::McpToolCallOutput { call_id: call.call_id.clone(), diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index 4897b4ea6..a55fb5fd5 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -197,7 +197,10 @@ impl ToolRouter { if payload_outputs_custom { ResponseInputItem::CustomToolCallOutput { call_id, - output: message, + output: codex_protocol::models::FunctionCallOutputPayload { + body: FunctionCallOutputBody::Text(message), + success: Some(false), + }, } } else { ResponseInputItem::FunctionCallOutput { diff --git a/codex-rs/core/tests/common/responses.rs b/codex-rs/core/tests/common/responses.rs index 6a073d926..bb6de200a 100644 --- a/codex-rs/core/tests/common/responses.rs +++ b/codex-rs/core/tests/common/responses.rs @@ -264,7 +264,7 @@ impl ResponsesRequest { .cloned() .unwrap_or(Value::Null); match output { - Value::String(text) => Some((Some(text), None)), + Value::String(_) | Value::Array(_) => Some((output_value_to_text(&output), None)), Value::Object(obj) => Some(( obj.get("content") .and_then(Value::as_str) @@ -296,6 +296,87 @@ impl ResponsesRequest { } } +pub(crate) fn output_value_to_text(value: &Value) -> Option { + match value { + Value::String(text) => Some(text.clone()), + Value::Array(items) => match items.as_slice() { + [item] if item.get("type").and_then(Value::as_str) == Some("input_text") => { + item.get("text").and_then(Value::as_str).map(str::to_string) + } + [_] | [] | [_, _, ..] => None, + }, + Value::Object(_) | Value::Number(_) | Value::Bool(_) | Value::Null => None, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use wiremock::http::HeaderMap; + use wiremock::http::Method; + + fn request_with_input(input: Value) -> ResponsesRequest { + ResponsesRequest(wiremock::Request { + url: "http://localhost/v1/responses" + .parse() + .expect("valid request url"), + method: Method::POST, + headers: HeaderMap::new(), + body: serde_json::to_vec(&serde_json::json!({ "input": input })) + .expect("serialize request body"), + }) + } + + #[test] + fn call_output_content_and_success_returns_only_single_text_content_item() { + let single_text = request_with_input(serde_json::json!([ + { + "type": "function_call_output", + "call_id": "call-1", + "output": [{ "type": "input_text", "text": "hello" }] + }, + { + "type": "custom_tool_call_output", + "call_id": "call-2", + "output": [{ "type": "input_text", "text": "world" }] + } + ])); + assert_eq!( + single_text.function_call_output_content_and_success("call-1"), + Some((Some("hello".to_string()), None)) + ); + assert_eq!( + single_text.custom_tool_call_output_content_and_success("call-2"), + Some((Some("world".to_string()), None)) + ); + + let mixed_content = request_with_input(serde_json::json!([ + { + "type": "function_call_output", + "call_id": "call-3", + "output": [ + { "type": "input_text", "text": "hello" }, + { "type": "input_image", "image_url": "data:image/png;base64,abc" } + ] + }, + { + "type": "custom_tool_call_output", + "call_id": "call-4", + "output": [{ "type": "input_image", "image_url": "data:image/png;base64,abc" }] + } + ])); + assert_eq!( + mixed_content.function_call_output_content_and_success("call-3"), + Some((None, None)) + ); + assert_eq!( + mixed_content.custom_tool_call_output_content_and_success("call-4"), + Some((None, None)) + ); + } +} + #[derive(Debug, Clone)] pub struct WebSocketRequest { body: Value, diff --git a/codex-rs/core/tests/common/test_codex.rs b/codex-rs/core/tests/common/test_codex.rs index 4253b4b68..bd15c6d7e 100644 --- a/codex-rs/core/tests/common/test_codex.rs +++ b/codex-rs/core/tests/common/test_codex.rs @@ -23,6 +23,7 @@ use wiremock::MockServer; use crate::load_default_config_for_test; use crate::responses::WebSocketTestServer; +use crate::responses::output_value_to_text; use crate::responses::start_mock_server; use crate::streaming_sse::StreamingSseServer; use crate::wait_for_event; @@ -394,11 +395,7 @@ impl TestCodexHarness { pub async fn custom_tool_call_output(&self, call_id: &str) -> String { let bodies = self.request_bodies().await; - custom_tool_call_output(&bodies, call_id) - .get("output") - .and_then(Value::as_str) - .expect("output string") - .to_string() + custom_tool_call_output_text(&bodies, call_id) } pub async fn apply_patch_output( @@ -433,6 +430,14 @@ fn custom_tool_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value panic!("custom_tool_call_output {call_id} not found"); } +fn custom_tool_call_output_text(bodies: &[Value], call_id: &str) -> String { + let output = custom_tool_call_output(bodies, call_id) + .get("output") + .unwrap_or_else(|| panic!("custom_tool_call_output {call_id} missing output")); + output_value_to_text(output) + .unwrap_or_else(|| panic!("custom_tool_call_output {call_id} missing text output")) +} + fn function_call_output<'a>(bodies: &'a [Value], call_id: &str) -> &'a Value { for body in bodies { if let Some(items) = body.get("input").and_then(Value::as_array) { @@ -456,3 +461,36 @@ pub fn test_codex() -> TestCodexBuilder { home: None, } } + +#[cfg(test)] +mod tests { + use super::*; + use pretty_assertions::assert_eq; + use serde_json::json; + + #[test] + fn custom_tool_call_output_text_returns_output_text() { + let bodies = vec![json!({ + "input": [{ + "type": "custom_tool_call_output", + "call_id": "call-1", + "output": "hello" + }] + })]; + + assert_eq!(custom_tool_call_output_text(&bodies, "call-1"), "hello"); + } + + #[test] + #[should_panic(expected = "custom_tool_call_output call-2 missing output")] + fn custom_tool_call_output_text_panics_when_output_is_missing() { + let bodies = vec![json!({ + "input": [{ + "type": "custom_tool_call_output", + "call_id": "call-2" + }] + })]; + + let _ = custom_tool_call_output_text(&bodies, "call-2"); + } +} diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 082b41a37..1de0522cb 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -35,6 +35,10 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::protocol::EventMsg; use codex_protocol::protocol::Op; +use codex_protocol::protocol::RolloutItem; +use codex_protocol::protocol::RolloutLine; +use codex_protocol::protocol::SessionMeta; +use codex_protocol::protocol::SessionMetaLine; use codex_protocol::protocol::SessionSource; use codex_protocol::user_input::UserInput; use core_test_support::apps_test_server::AppsTestServer; @@ -344,6 +348,144 @@ async fn resume_includes_initial_messages_and_sends_prior_items() { assert!(pos_environment < pos_new_user); } +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn resume_replays_legacy_js_repl_image_rollout_shapes() { + skip_if_no_network!(); + + // Early js_repl builds persisted image tool results as two separate rollout items: + // a string-valued custom_tool_call_output plus a standalone user input_image message. + // Current image tests cover today's shapes; this keeps resume compatibility for that + // legacy rollout representation. + let legacy_custom_tool_call = ResponseItem::CustomToolCall { + id: None, + status: None, + call_id: "legacy-js-call".to_string(), + name: "js_repl".to_string(), + input: "console.log('legacy image flow')".to_string(), + }; + let legacy_image_url = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR4nGP4z8DwHwAFAAH/iZk9HQAAAABJRU5ErkJggg=="; + let rollout = vec![ + RolloutLine { + timestamp: "2024-01-01T00:00:00.000Z".to_string(), + item: RolloutItem::SessionMeta(SessionMetaLine { + meta: SessionMeta { + id: ThreadId::default(), + timestamp: "2024-01-01T00:00:00Z".to_string(), + cwd: ".".into(), + originator: "test_originator".to_string(), + cli_version: "test_version".to_string(), + model_provider: Some("test-provider".to_string()), + ..Default::default() + }, + git: None, + }), + }, + RolloutLine { + timestamp: "2024-01-01T00:00:01.000Z".to_string(), + item: RolloutItem::ResponseItem(legacy_custom_tool_call), + }, + RolloutLine { + timestamp: "2024-01-01T00:00:02.000Z".to_string(), + item: RolloutItem::ResponseItem(ResponseItem::CustomToolCallOutput { + call_id: "legacy-js-call".to_string(), + output: FunctionCallOutputPayload::from_text("legacy js_repl stdout".to_string()), + }), + }, + RolloutLine { + timestamp: "2024-01-01T00:00:03.000Z".to_string(), + item: RolloutItem::ResponseItem(ResponseItem::Message { + id: None, + role: "user".to_string(), + content: vec![ContentItem::InputImage { + image_url: legacy_image_url.to_string(), + }], + end_turn: None, + phase: None, + }), + }, + ]; + + let tmpdir = TempDir::new().unwrap(); + let session_path = tmpdir + .path() + .join("resume-legacy-js-repl-image-rollout.jsonl"); + let mut f = std::fs::File::create(&session_path).unwrap(); + for line in rollout { + writeln!(f, "{}", serde_json::to_string(&line).unwrap()).unwrap(); + } + + let server = MockServer::start().await; + let resp_mock = mount_sse_once( + &server, + sse(vec![ev_response_created("resp1"), ev_completed("resp1")]), + ) + .await; + + let codex_home = Arc::new(TempDir::new().unwrap()); + let mut builder = test_codex().with_model("gpt-5.1"); + let test = builder + .resume(&server, codex_home, session_path.clone()) + .await + .expect("resume conversation"); + test.submit_turn("after resume").await.unwrap(); + + let input = resp_mock.single_request().input(); + + let legacy_output_index = input + .iter() + .position(|item| { + item.get("type").and_then(|value| value.as_str()) == Some("custom_tool_call_output") + && item.get("call_id").and_then(|value| value.as_str()) == Some("legacy-js-call") + }) + .expect("legacy custom tool output should be replayed"); + assert_eq!( + input[legacy_output_index] + .get("output") + .and_then(|value| value.as_str()), + Some("legacy js_repl stdout") + ); + + let legacy_image_index = input + .iter() + .position(|item| { + item.get("type").and_then(|value| value.as_str()) == Some("message") + && item.get("role").and_then(|value| value.as_str()) == Some("user") + && item + .get("content") + .and_then(|value| value.as_array()) + .is_some_and(|content| { + content.iter().any(|entry| { + entry.get("type").and_then(|value| value.as_str()) + == Some("input_image") + && entry.get("image_url").and_then(|value| value.as_str()) + == Some(legacy_image_url) + }) + }) + }) + .expect("legacy injected image message should be replayed"); + + let new_user_index = input + .iter() + .position(|item| { + item.get("type").and_then(|value| value.as_str()) == Some("message") + && item.get("role").and_then(|value| value.as_str()) == Some("user") + && item + .get("content") + .and_then(|value| value.as_array()) + .is_some_and(|content| { + content.iter().any(|entry| { + entry.get("type").and_then(|value| value.as_str()) == Some("input_text") + && entry.get("text").and_then(|value| value.as_str()) + == Some("after resume") + }) + }) + }) + .expect("new user message should be present"); + + assert!(legacy_output_index < new_user_index); + assert!(legacy_image_index < new_user_index); +} + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] async fn includes_conversation_id_and_model_headers_in_request() { skip_if_no_network!(); @@ -1565,7 +1707,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() { }); prompt.input.push(ResponseItem::CustomToolCallOutput { call_id: "custom-tool-call-id".into(), - output: "ok".into(), + output: FunctionCallOutputPayload::from_text("ok".into()), }); let mut stream = client_session diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index c4f8ebec3..43d14e678 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -358,40 +358,26 @@ console.log(out.output?.body?.text ?? ""); .await; let req = mock.single_request(); - let (js_repl_output, js_repl_success) = req - .custom_tool_call_output_content_and_success(call_id) - .expect("custom tool output present"); - let js_repl_output = js_repl_output.expect("custom tool output text present"); - assert_ne!( - js_repl_success, - Some(false), - "js_repl call failed unexpectedly: {js_repl_output}" + let body = req.body_json(); + assert_eq!( + image_messages(&body).len(), + 0, + "js_repl view_image should not inject a pending input image message" ); - let body = req.body_json(); - let image_messages = image_messages(&body); - assert_eq!( - image_messages.len(), - 1, - "js_repl view_image should inject exactly one pending input image message" - ); - let image_message = image_messages - .into_iter() - .next() - .expect("pending input image message not included in request"); - let image_url = image_message - .get("content") + let custom_output = req.custom_tool_call_output(call_id); + let output_items = custom_output + .get("output") .and_then(Value::as_array) - .and_then(|content| { - content.iter().find_map(|span| { - if span.get("type").and_then(Value::as_str) == Some("input_image") { - span.get("image_url").and_then(Value::as_str) - } else { - None - } - }) + .expect("custom_tool_call_output should be a content item array"); + let image_url = output_items + .iter() + .find_map(|item| { + (item.get("type").and_then(Value::as_str) == Some("input_image")) + .then(|| item.get("image_url").and_then(Value::as_str)) + .flatten() }) - .expect("image_url present"); + .expect("image_url present in js_repl custom tool output"); assert!( image_url.starts_with("data:image/png;base64,"), "expected png data URL, got {image_url}" diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index d74401c60..14cb96828 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -161,7 +161,7 @@ pub enum ResponseInputItem { }, CustomToolCallOutput { call_id: String, - output: String, + output: FunctionCallOutputPayload, }, } @@ -261,9 +261,12 @@ pub enum ResponseItem { name: String, input: String, }, + // `custom_tool_call_output.output` uses the same wire encoding as + // `function_call_output.output` so freeform tools can return either plain + // text or structured content items. CustomToolCallOutput { call_id: String, - output: String, + output: FunctionCallOutputPayload, }, // Emitted by the Responses API when the agent triggers a web search. // Example payload (from SSE `response.output_item.done`): @@ -1538,6 +1541,26 @@ mod tests { Ok(()) } + #[test] + fn serializes_custom_tool_image_outputs_as_array() -> Result<()> { + let item = ResponseInputItem::CustomToolCallOutput { + call_id: "call1".into(), + output: FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,BASE64".into(), + }, + ]), + }; + + let json = serde_json::to_string(&item)?; + let v: serde_json::Value = serde_json::from_str(&json)?; + + let output = v.get("output").expect("output field"); + assert!(output.is_array(), "expected array output"); + + Ok(()) + } + #[test] fn preserves_existing_image_data_urls() -> Result<()> { let call_tool_result = CallToolResult {