From 335a4e1cbceb96c280e070729b8759af045b6211 Mon Sep 17 00:00:00 2001 From: pakrym-oai Date: Sun, 22 Feb 2026 23:00:08 -0800 Subject: [PATCH] Return image content from view_image (#12553) Responses API supports image content --- .../core/src/tools/handlers/view_image.rs | 33 ++++--- codex-rs/core/tests/suite/view_image.rs | 91 ++++++++----------- 2 files changed, 55 insertions(+), 69 deletions(-) diff --git a/codex-rs/core/src/tools/handlers/view_image.rs b/codex-rs/core/src/tools/handlers/view_image.rs index cfcd5ec2a..6337cef3e 100644 --- a/codex-rs/core/src/tools/handlers/view_image.rs +++ b/codex-rs/core/src/tools/handlers/view_image.rs @@ -1,5 +1,6 @@ use async_trait::async_trait; use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::openai_models::InputModality; use serde::Deserialize; use tokio::fs; @@ -14,7 +15,6 @@ use crate::tools::handlers::parse_arguments; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; use codex_protocol::models::ContentItem; -use codex_protocol::models::ResponseInputItem; use codex_protocol::models::local_image_content_items_with_label_number; pub struct ViewImageHandler; @@ -81,21 +81,20 @@ impl ToolHandler for ViewImageHandler { } let event_path = abs_path.clone(); - let content: Vec = - local_image_content_items_with_label_number(&abs_path, None); - let input = ResponseInputItem::Message { - role: "user".to_string(), - content, - }; - - session - .inject_response_items(vec![input]) - .await - .map_err(|_| { - FunctionCallError::RespondToModel( - "unable to attach image (no active task)".to_string(), - ) - })?; + let content = local_image_content_items_with_label_number(&abs_path, None) + .into_iter() + .map(|item| match item { + ContentItem::InputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + ContentItem::InputImage { image_url } => { + FunctionCallOutputContentItem::InputImage { image_url } + } + ContentItem::OutputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + }) + .collect(); session .send_event( @@ -108,7 +107,7 @@ impl ToolHandler for ViewImageHandler { .await; Ok(ToolOutput::Function { - body: FunctionCallOutputBody::Text("attached local image path".to_string()), + body: FunctionCallOutputBody::ContentItems(content), success: Some(true), }) } diff --git a/codex-rs/core/tests/suite/view_image.rs b/codex-rs/core/tests/suite/view_image.rs index 96315e5c7..5d2c8e3d5 100644 --- a/codex-rs/core/tests/suite/view_image.rs +++ b/codex-rs/core/tests/suite/view_image.rs @@ -237,40 +237,29 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> { let req = mock.single_request(); let body = req.body_json(); - let output_text = req - .function_call_output_content_and_success(call_id) - .and_then(|(content, _)| content) - .expect("output text present"); - assert_eq!(output_text, "attached local image path"); + assert!( + find_image_message(&body).is_none(), + "view_image tool should not inject a separate image message" + ); - let image_message = - find_image_message(&body).expect("pending input image message not included in request"); - let content_items = image_message - .get("content") + let function_output = req.function_call_output(call_id); + let output_items = function_output + .get("output") .and_then(Value::as_array) - .expect("image message has content array"); + .expect("function_call_output should be a content item array"); assert_eq!( - content_items.len(), + output_items.len(), 1, - "view_image should inject only the image content item (no tag/label text)" + "view_image should return only the image content item (no tag/label text)" ); assert_eq!( - content_items[0].get("type").and_then(Value::as_str), + output_items[0].get("type").and_then(Value::as_str), Some("input_image"), - "view_image should inject only an input_image content item" + "view_image should return only an input_image content item" ); - let image_url = image_message - .get("content") - .and_then(Value::as_array) - .and_then(|content| { - content.iter().find_map(|span| { - if span.get("type").and_then(Value::as_str) == Some("input_image") { - span.get("image_url").and_then(Value::as_str) - } else { - None - } - }) - }) + let image_url = output_items[0] + .get("image_url") + .and_then(Value::as_str) .expect("image_url present"); let (prefix, encoded) = image_url @@ -535,38 +524,36 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()> request.inputs_of_type("input_image").is_empty(), "non-image file should not produce an input_image message" ); + let function_output = request.function_call_output(call_id); + let output_items = function_output + .get("output") + .and_then(Value::as_array) + .expect("function_call_output should be a content item array"); + assert_eq!( + output_items.len(), + 1, + "non-image placeholder should be returned as a single content item" + ); + assert_eq!( + output_items[0].get("type").and_then(Value::as_str), + Some("input_text"), + "non-image placeholder should be returned as input_text" + ); + let placeholder = output_items[0] + .get("text") + .and_then(Value::as_str) + .expect("placeholder text present"); - let placeholder = request - .inputs_of_type("message") - .iter() - .find_map(|item| { - let content = item.get("content").and_then(Value::as_array)?; - content.iter().find_map(|span| { - if span.get("type").and_then(Value::as_str) == Some("input_text") { - let text = span.get("text").and_then(Value::as_str)?; - if text.contains("Codex could not read the local image at") - && text.contains("unsupported MIME type `application/json`") - { - return Some(text.to_string()); - } - } - None - }) - }) - .expect("placeholder text found"); - + assert!( + placeholder.contains("Codex could not read the local image at") + && placeholder.contains("unsupported MIME type `application/json`"), + "placeholder should describe the unsupported file type: {placeholder}" + ); assert!( placeholder.contains(&abs_path.display().to_string()), "placeholder should mention path: {placeholder}" ); - let output_text = mock - .single_request() - .function_call_output_content_and_success(call_id) - .and_then(|(content, _)| content) - .expect("output text present"); - assert_eq!(output_text, "attached local image path"); - Ok(()) }