Return image content from view_image (#12553)

Responses API supports image content
This commit is contained in:
pakrym-oai 2026-02-22 23:00:08 -08:00 committed by GitHub
parent e8949f4507
commit 335a4e1cbc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 55 additions and 69 deletions

View file

@ -1,5 +1,6 @@
use async_trait::async_trait;
use codex_protocol::models::FunctionCallOutputBody;
use codex_protocol::models::FunctionCallOutputContentItem;
use codex_protocol::openai_models::InputModality;
use serde::Deserialize;
use tokio::fs;
@ -14,7 +15,6 @@ use crate::tools::handlers::parse_arguments;
use crate::tools::registry::ToolHandler;
use crate::tools::registry::ToolKind;
use codex_protocol::models::ContentItem;
use codex_protocol::models::ResponseInputItem;
use codex_protocol::models::local_image_content_items_with_label_number;
pub struct ViewImageHandler;
@ -81,21 +81,20 @@ impl ToolHandler for ViewImageHandler {
}
let event_path = abs_path.clone();
let content: Vec<ContentItem> =
local_image_content_items_with_label_number(&abs_path, None);
let input = ResponseInputItem::Message {
role: "user".to_string(),
content,
};
session
.inject_response_items(vec![input])
.await
.map_err(|_| {
FunctionCallError::RespondToModel(
"unable to attach image (no active task)".to_string(),
)
})?;
let content = local_image_content_items_with_label_number(&abs_path, None)
.into_iter()
.map(|item| match item {
ContentItem::InputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
ContentItem::InputImage { image_url } => {
FunctionCallOutputContentItem::InputImage { image_url }
}
ContentItem::OutputText { text } => {
FunctionCallOutputContentItem::InputText { text }
}
})
.collect();
session
.send_event(
@ -108,7 +107,7 @@ impl ToolHandler for ViewImageHandler {
.await;
Ok(ToolOutput::Function {
body: FunctionCallOutputBody::Text("attached local image path".to_string()),
body: FunctionCallOutputBody::ContentItems(content),
success: Some(true),
})
}

View file

@ -237,40 +237,29 @@ async fn view_image_tool_attaches_local_image() -> anyhow::Result<()> {
let req = mock.single_request();
let body = req.body_json();
let output_text = req
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
assert!(
find_image_message(&body).is_none(),
"view_image tool should not inject a separate image message"
);
let image_message =
find_image_message(&body).expect("pending input image message not included in request");
let content_items = image_message
.get("content")
let function_output = req.function_call_output(call_id);
let output_items = function_output
.get("output")
.and_then(Value::as_array)
.expect("image message has content array");
.expect("function_call_output should be a content item array");
assert_eq!(
content_items.len(),
output_items.len(),
1,
"view_image should inject only the image content item (no tag/label text)"
"view_image should return only the image content item (no tag/label text)"
);
assert_eq!(
content_items[0].get("type").and_then(Value::as_str),
output_items[0].get("type").and_then(Value::as_str),
Some("input_image"),
"view_image should inject only an input_image content item"
"view_image should return only an input_image content item"
);
let image_url = image_message
.get("content")
.and_then(Value::as_array)
.and_then(|content| {
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_image") {
span.get("image_url").and_then(Value::as_str)
} else {
None
}
})
})
let image_url = output_items[0]
.get("image_url")
.and_then(Value::as_str)
.expect("image_url present");
let (prefix, encoded) = image_url
@ -535,38 +524,36 @@ async fn view_image_tool_placeholder_for_non_image_files() -> anyhow::Result<()>
request.inputs_of_type("input_image").is_empty(),
"non-image file should not produce an input_image message"
);
let function_output = request.function_call_output(call_id);
let output_items = function_output
.get("output")
.and_then(Value::as_array)
.expect("function_call_output should be a content item array");
assert_eq!(
output_items.len(),
1,
"non-image placeholder should be returned as a single content item"
);
assert_eq!(
output_items[0].get("type").and_then(Value::as_str),
Some("input_text"),
"non-image placeholder should be returned as input_text"
);
let placeholder = output_items[0]
.get("text")
.and_then(Value::as_str)
.expect("placeholder text present");
let placeholder = request
.inputs_of_type("message")
.iter()
.find_map(|item| {
let content = item.get("content").and_then(Value::as_array)?;
content.iter().find_map(|span| {
if span.get("type").and_then(Value::as_str) == Some("input_text") {
let text = span.get("text").and_then(Value::as_str)?;
if text.contains("Codex could not read the local image at")
&& text.contains("unsupported MIME type `application/json`")
{
return Some(text.to_string());
}
}
None
})
})
.expect("placeholder text found");
assert!(
placeholder.contains("Codex could not read the local image at")
&& placeholder.contains("unsupported MIME type `application/json`"),
"placeholder should describe the unsupported file type: {placeholder}"
);
assert!(
placeholder.contains(&abs_path.display().to_string()),
"placeholder should mention path: {placeholder}"
);
let output_text = mock
.single_request()
.function_call_output_content_and_success(call_id)
.and_then(|(content, _)| content)
.expect("output text present");
assert_eq!(output_text, "attached local image path");
Ok(())
}