diff --git a/codex-rs/app-server-protocol/schema/json/ClientRequest.json b/codex-rs/app-server-protocol/schema/json/ClientRequest.json index 4a40cc9ce..54041b40f 100644 --- a/codex-rs/app-server-protocol/schema/json/ClientRequest.json +++ b/codex-rs/app-server-protocol/schema/json/ClientRequest.json @@ -480,6 +480,19 @@ }, "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -526,19 +539,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -548,7 +552,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json b/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json index 662d3bda4..e0e29641d 100644 --- a/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json +++ b/codex-rs/app-server-protocol/schema/json/DynamicToolCallResponse.json @@ -1,15 +1,64 @@ { "$schema": "http://json-schema.org/draft-07/schema#", + "definitions": { + "DynamicToolCallOutputContentItem": { + "oneOf": [ + { + "properties": { + "text": { + "type": "string" + }, + "type": { + "enum": [ + "inputText" + ], + "title": "InputTextDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "text", + "type" + ], + "title": "InputTextDynamicToolCallOutputContentItem", + "type": "object" + }, + { + "properties": { + "imageUrl": { + "type": "string" + }, + "type": { + "enum": [ + "inputImage" + ], + "title": "InputImageDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "imageUrl", + "type" + ], + "title": "InputImageDynamicToolCallOutputContentItem", + "type": "object" + } + ] + } + }, "properties": { - "output": { - "type": "string" + "contentItems": { + "items": { + "$ref": "#/definitions/DynamicToolCallOutputContentItem" + }, + "type": "array" }, "success": { "type": "boolean" } }, "required": [ - "output", + "contentItems", "success" ], "title": "DynamicToolCallResponse", diff --git a/codex-rs/app-server-protocol/schema/json/EventMsg.json b/codex-rs/app-server-protocol/schema/json/EventMsg.json index ab556a77c..f399912e0 100644 --- a/codex-rs/app-server-protocol/schema/json/EventMsg.json +++ b/codex-rs/app-server-protocol/schema/json/EventMsg.json @@ -2864,6 +2864,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2910,19 +2923,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2932,7 +2936,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/ServerNotification.json b/codex-rs/app-server-protocol/schema/json/ServerNotification.json index f6d10c11c..0e2f94f4d 100644 --- a/codex-rs/app-server-protocol/schema/json/ServerNotification.json +++ b/codex-rs/app-server-protocol/schema/json/ServerNotification.json @@ -3484,6 +3484,19 @@ ], "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -3530,19 +3543,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -3552,7 +3556,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 87220712e..9b22ff220 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -2312,6 +2312,50 @@ ], "type": "object" }, + "DynamicToolCallOutputContentItem": { + "oneOf": [ + { + "properties": { + "text": { + "type": "string" + }, + "type": { + "enum": [ + "inputText" + ], + "title": "InputTextDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "text", + "type" + ], + "title": "InputTextDynamicToolCallOutputContentItem", + "type": "object" + }, + { + "properties": { + "imageUrl": { + "type": "string" + }, + "type": { + "enum": [ + "inputImage" + ], + "title": "InputImageDynamicToolCallOutputContentItemType", + "type": "string" + } + }, + "required": [ + "imageUrl", + "type" + ], + "title": "InputImageDynamicToolCallOutputContentItem", + "type": "object" + } + ] + }, "DynamicToolCallParams": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { @@ -2342,15 +2386,18 @@ "DynamicToolCallResponse": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { - "output": { - "type": "string" + "contentItems": { + "items": { + "$ref": "#/definitions/DynamicToolCallOutputContentItem" + }, + "type": "array" }, "success": { "type": "boolean" } }, "required": [ - "output", + "contentItems", "success" ], "title": "DynamicToolCallResponse", @@ -5044,6 +5091,19 @@ "title": "ForkConversationResponse", "type": "object" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -5090,19 +5150,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -5112,7 +5163,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, @@ -11246,6 +11297,19 @@ ], "type": "string" }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/v2/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -11292,19 +11356,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/v2/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/v2/FunctionCallOutputBody" }, "success": { "type": [ @@ -11314,7 +11369,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json b/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json index 215c80b28..a5838e89e 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ForkConversationResponse.json @@ -2864,6 +2864,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2910,19 +2923,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2932,7 +2936,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json index 1261306e6..9ce52963d 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationParams.json @@ -98,6 +98,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -144,19 +157,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -166,7 +170,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json index bd6d80b6e..718b17aa2 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v1/ResumeConversationResponse.json @@ -2864,6 +2864,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2910,19 +2923,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2932,7 +2936,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json b/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json index a61d4141a..a85b78281 100644 --- a/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v1/SessionConfiguredNotification.json @@ -2864,6 +2864,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -2910,19 +2923,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -2932,7 +2936,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json index 1b307c9b8..c1e36ad8e 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json +++ b/codex-rs/app-server-protocol/schema/json/v2/RawResponseItemCompletedNotification.json @@ -65,6 +65,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -111,19 +124,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -133,7 +137,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json index 8167ea779..ed1d5274e 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ThreadResumeParams.json @@ -74,6 +74,19 @@ } ] }, + "FunctionCallOutputBody": { + "anyOf": [ + { + "type": "string" + }, + { + "items": { + "$ref": "#/definitions/FunctionCallOutputContentItem" + }, + "type": "array" + } + ] + }, "FunctionCallOutputContentItem": { "description": "Responses API compatible content items that can be returned by a tool call. This is a subset of ContentItem with the types we support as function call outputs.", "oneOf": [ @@ -120,19 +133,10 @@ ] }, "FunctionCallOutputPayload": { - "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`content` preserves the historical plain-string payload so downstream integrations (tests, logging, etc.) can keep treating tool output as `String`. When an MCP server returns richer data we additionally populate `content_items` with the structured form that the Responses API understands.", + "description": "The payload we send back to OpenAI when reporting a tool call result.\n\n`body` serializes directly as the wire value for `function_call_output.output`. `success` remains internal metadata for downstream handling.", "properties": { - "content": { - "type": "string" - }, - "content_items": { - "items": { - "$ref": "#/definitions/FunctionCallOutputContentItem" - }, - "type": [ - "array", - "null" - ] + "body": { + "$ref": "#/definitions/FunctionCallOutputBody" }, "success": { "type": [ @@ -142,7 +146,7 @@ } }, "required": [ - "content" + "body" ], "type": "object" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts new file mode 100644 index 000000000..6bcb7e25d --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputBody.ts @@ -0,0 +1,6 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; + +export type FunctionCallOutputBody = string | Array; diff --git a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts index 94370f582..6376c5b8e 100644 --- a/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts +++ b/codex-rs/app-server-protocol/schema/typescript/FunctionCallOutputPayload.ts @@ -1,14 +1,12 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. -import type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; +import type { FunctionCallOutputBody } from "./FunctionCallOutputBody"; /** * The payload we send back to OpenAI when reporting a tool call result. * - * `content` preserves the historical plain-string payload so downstream - * integrations (tests, logging, etc.) can keep treating tool output as - * `String`. When an MCP server returns richer data we additionally populate - * `content_items` with the structured form that the Responses API understands. + * `body` serializes directly as the wire value for `function_call_output.output`. + * `success` remains internal metadata for downstream handling. */ -export type FunctionCallOutputPayload = { content: string, content_items: Array | null, success: boolean | null, }; +export type FunctionCallOutputPayload = { body: FunctionCallOutputBody, success: boolean | null, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts index 7d3ecb818..a6ff6fbaf 100644 --- a/codex-rs/app-server-protocol/schema/typescript/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/index.ts @@ -69,6 +69,7 @@ export type { FileChange } from "./FileChange"; export type { ForcedLoginMethod } from "./ForcedLoginMethod"; export type { ForkConversationParams } from "./ForkConversationParams"; export type { ForkConversationResponse } from "./ForkConversationResponse"; +export type { FunctionCallOutputBody } from "./FunctionCallOutputBody"; export type { FunctionCallOutputContentItem } from "./FunctionCallOutputContentItem"; export type { FunctionCallOutputPayload } from "./FunctionCallOutputPayload"; export type { FuzzyFileSearchParams } from "./FuzzyFileSearchParams"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts new file mode 100644 index 000000000..8f432109d --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallOutputContentItem.ts @@ -0,0 +1,5 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +export type DynamicToolCallOutputContentItem = { "type": "inputText", text: string, } | { "type": "inputImage", imageUrl: string, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts index a35b9b394..788e6242d 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/DynamicToolCallResponse.ts @@ -1,5 +1,6 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { DynamicToolCallOutputContentItem } from "./DynamicToolCallOutputContentItem"; -export type DynamicToolCallResponse = { output: string, success: boolean, }; +export type DynamicToolCallResponse = { contentItems: Array, success: boolean, }; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts index ed4f74d4a..ec5d54cba 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/index.ts @@ -46,6 +46,7 @@ export type { ConfigWriteResponse } from "./ConfigWriteResponse"; export type { ContextCompactedNotification } from "./ContextCompactedNotification"; export type { CreditsSnapshot } from "./CreditsSnapshot"; export type { DeprecationNoticeNotification } from "./DeprecationNoticeNotification"; +export type { DynamicToolCallOutputContentItem } from "./DynamicToolCallOutputContentItem"; export type { DynamicToolCallParams } from "./DynamicToolCallParams"; export type { DynamicToolCallResponse } from "./DynamicToolCallResponse"; export type { DynamicToolSpec } from "./DynamicToolSpec"; diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 8e7beccf7..baafa8467 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -2826,10 +2826,34 @@ pub struct DynamicToolCallParams { #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] pub struct DynamicToolCallResponse { - pub output: String, + pub content_items: Vec, pub success: bool, } +#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +#[ts(export_to = "v2/")] +pub enum DynamicToolCallOutputContentItem { + #[serde(rename_all = "camelCase")] + InputText { text: String }, + #[serde(rename_all = "camelCase")] + InputImage { image_url: String }, +} + +impl From + for codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem +{ + fn from(item: DynamicToolCallOutputContentItem) -> Self { + match item { + DynamicToolCallOutputContentItem::InputText { text } => Self::InputText { text }, + DynamicToolCallOutputContentItem::InputImage { image_url } => { + Self::InputImage { image_url } + } + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] #[ts(export_to = "v2/")] @@ -3189,4 +3213,61 @@ mod tests { }) ); } + + #[test] + fn dynamic_tool_response_serializes_content_items() { + let value = serde_json::to_value(DynamicToolCallResponse { + content_items: vec![DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }], + success: true, + }) + .unwrap(); + + assert_eq!( + value, + json!({ + "contentItems": [ + { + "type": "inputText", + "text": "dynamic-ok" + } + ], + "success": true, + }) + ); + } + + #[test] + fn dynamic_tool_response_serializes_text_and_image_content_items() { + let value = serde_json::to_value(DynamicToolCallResponse { + content_items: vec![ + DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }, + DynamicToolCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ], + success: true, + }) + .unwrap(); + + assert_eq!( + value, + json!({ + "contentItems": [ + { + "type": "inputText", + "text": "dynamic-ok" + }, + { + "type": "inputImage", + "imageUrl": "data:image/png;base64,AAA" + } + ], + "success": true, + }) + ); + } } diff --git a/codex-rs/app-server/README.md b/codex-rs/app-server/README.md index 69a752471..0fbb6649a 100644 --- a/codex-rs/app-server/README.md +++ b/codex-rs/app-server/README.md @@ -122,6 +122,7 @@ Start a fresh thread when you need a new Codex conversation. "approvalPolicy": "never", "sandbox": "workspaceWrite", "personality": "friendly", + // Experimental: requires opt-in "dynamicTools": [ { "name": "lookup_ticket", @@ -556,6 +557,41 @@ Order of messages: UI guidance for IDEs: surface an approval dialog as soon as the request arrives. The turn will proceed after the server receives a response to the approval request. The terminal `item/completed` notification will be sent with the appropriate status. +### Dynamic tool calls (experimental) + +`dynamicTools` on `thread/start` and the corresponding `item/tool/call` request/response flow are experimental APIs. To enable them, set `initialize.params.capabilities.experimentalApi = true`. + +When a dynamic tool is invoked during a turn, the server sends an `item/tool/call` JSON-RPC request to the client: + +```json +{ + "method": "item/tool/call", + "id": 60, + "params": { + "threadId": "thr_123", + "turnId": "turn_123", + "callId": "call_123", + "tool": "lookup_ticket", + "arguments": { "id": "ABC-123" } + } +} +``` + +The client must respond with content items. Use `inputText` for text and `inputImage` for image URLs/data URLs: + +```json +{ + "id": 60, + "result": { + "contentItems": [ + { "type": "inputText", "text": "Ticket ABC-123 is open." }, + { "type": "inputImage", "imageUrl": "data:image/png;base64,AAA" } + ], + "success": true + } +} +``` + ## Skills Invoke a skill by including `$` in the text input. Add a `skill` input item (recommended) so the backend injects full skill instructions instead of relying on the model to resolve the name. diff --git a/codex-rs/app-server/src/bespoke_event_handling.rs b/codex-rs/app-server/src/bespoke_event_handling.rs index 55b185e5a..fd8db9cb6 100644 --- a/codex-rs/app-server/src/bespoke_event_handling.rs +++ b/codex-rs/app-server/src/bespoke_event_handling.rs @@ -88,6 +88,7 @@ use codex_core::protocol::TurnDiffEvent; use codex_core::review_format::format_review_findings_block; use codex_core::review_prompts; use codex_protocol::ThreadId; +use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem as CoreDynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse as CoreDynamicToolResponse; use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::protocol::ReviewOutputEvent; @@ -351,8 +352,9 @@ pub(crate) async fn apply_bespoke_event_handling( .submit(Op::DynamicToolResponse { id: call_id.clone(), response: CoreDynamicToolResponse { - call_id, - output: "dynamic tool calls require api v2".to_string(), + content_items: vec![CoreDynamicToolCallOutputContentItem::InputText { + text: "dynamic tool calls require api v2".to_string(), + }], success: false, }, }) diff --git a/codex-rs/app-server/src/dynamic_tools.rs b/codex-rs/app-server/src/dynamic_tools.rs index a1b424d0e..ed284452b 100644 --- a/codex-rs/app-server/src/dynamic_tools.rs +++ b/codex-rs/app-server/src/dynamic_tools.rs @@ -1,5 +1,6 @@ use codex_app_server_protocol::DynamicToolCallResponse; use codex_core::CodexThread; +use codex_protocol::dynamic_tools::DynamicToolCallOutputContentItem as CoreDynamicToolCallOutputContentItem; use codex_protocol::dynamic_tools::DynamicToolResponse as CoreDynamicToolResponse; use codex_protocol::protocol::Op; use std::sync::Arc; @@ -17,8 +18,9 @@ pub(crate) async fn on_call_response( Err(err) => { error!("request failed: {err:?}"); let fallback = CoreDynamicToolResponse { - call_id: call_id.clone(), - output: "dynamic tool request failed".to_string(), + content_items: vec![CoreDynamicToolCallOutputContentItem::InputText { + text: "dynamic tool request failed".to_string(), + }], success: false, }; if let Err(err) = conversation @@ -37,14 +39,25 @@ pub(crate) async fn on_call_response( let response = serde_json::from_value::(value).unwrap_or_else(|err| { error!("failed to deserialize DynamicToolCallResponse: {err}"); DynamicToolCallResponse { - output: "dynamic tool response was invalid".to_string(), + content_items: vec![ + codex_app_server_protocol::DynamicToolCallOutputContentItem::InputText { + text: "dynamic tool response was invalid".to_string(), + }, + ], success: false, } }); + + let DynamicToolCallResponse { + content_items, + success, + } = response; let response = CoreDynamicToolResponse { - call_id: call_id.clone(), - output: response.output, - success: response.success, + content_items: content_items + .into_iter() + .map(CoreDynamicToolCallOutputContentItem::from) + .collect(), + success, }; if let Err(err) = conversation .submit(Op::DynamicToolResponse { diff --git a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs index dc985ac49..f4cd85a61 100644 --- a/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs +++ b/codex-rs/app-server/tests/suite/v2/dynamic_tools.rs @@ -4,6 +4,7 @@ use app_test_support::McpProcess; use app_test_support::create_final_assistant_message_sse_response; use app_test_support::create_mock_responses_server_sequence_unchecked; use app_test_support::to_response; +use codex_app_server_protocol::DynamicToolCallOutputContentItem; use codex_app_server_protocol::DynamicToolCallParams; use codex_app_server_protocol::DynamicToolCallResponse; use codex_app_server_protocol::DynamicToolSpec; @@ -15,6 +16,9 @@ use codex_app_server_protocol::ThreadStartResponse; use codex_app_server_protocol::TurnStartParams; use codex_app_server_protocol::TurnStartResponse; use codex_app_server_protocol::UserInput as V2UserInput; +use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputPayload; use core_test_support::responses; use pretty_assertions::assert_eq; use serde_json::Value; @@ -111,7 +115,7 @@ async fn thread_start_injects_dynamic_tools_into_model_requests() -> Result<()> /// Exercises the full dynamic tool call path (server request, client response, model output). #[tokio::test] -async fn dynamic_tool_call_round_trip_sends_output_to_model() -> Result<()> { +async fn dynamic_tool_call_round_trip_sends_text_content_items_to_model() -> Result<()> { let call_id = "dyn-call-1"; let tool_name = "demo_tool"; let tool_args = json!({ "city": "Paris" }); @@ -200,7 +204,9 @@ async fn dynamic_tool_call_round_trip_sends_output_to_model() -> Result<()> { // Respond to the tool call so the model receives a function_call_output. let response = DynamicToolCallResponse { - output: "dynamic-ok".to_string(), + content_items: vec![DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }], success: true, }; mcp.send_response(request_id, serde_json::to_value(response)?) @@ -213,11 +219,171 @@ async fn dynamic_tool_call_round_trip_sends_output_to_model() -> Result<()> { .await??; let bodies = responses_bodies(&server).await?; - let output = bodies + let payload = bodies .iter() - .find_map(|body| function_call_output_text(body, call_id)) + .find_map(|body| function_call_output_payload(body, call_id)) .context("expected function_call_output in follow-up request")?; - assert_eq!(output, "dynamic-ok"); + let expected_payload = FunctionCallOutputPayload::from_content_items(vec![ + FunctionCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }, + ]); + assert_eq!(payload, expected_payload); + + Ok(()) +} + +/// Ensures dynamic tool call responses can include structured content items. +#[tokio::test] +async fn dynamic_tool_call_round_trip_sends_content_items_to_model() -> Result<()> { + let call_id = "dyn-call-items-1"; + let tool_name = "demo_tool"; + let tool_args = json!({ "city": "Paris" }); + let tool_call_arguments = serde_json::to_string(&tool_args)?; + + let responses = vec![ + responses::sse(vec![ + responses::ev_response_created("resp-1"), + responses::ev_function_call(call_id, tool_name, &tool_call_arguments), + responses::ev_completed("resp-1"), + ]), + create_final_assistant_message_sse_response("Done")?, + ]; + let server = create_mock_responses_server_sequence_unchecked(responses).await; + + let codex_home = TempDir::new()?; + create_config_toml(codex_home.path(), &server.uri())?; + + let mut mcp = McpProcess::new(codex_home.path()).await?; + timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??; + + let dynamic_tool = DynamicToolSpec { + name: tool_name.to_string(), + description: "Demo dynamic tool".to_string(), + input_schema: json!({ + "type": "object", + "properties": { + "city": { "type": "string" } + }, + "required": ["city"], + "additionalProperties": false, + }), + }; + + let thread_req = mcp + .send_thread_start_request(ThreadStartParams { + dynamic_tools: Some(vec![dynamic_tool]), + ..Default::default() + }) + .await?; + let thread_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(thread_req)), + ) + .await??; + let ThreadStartResponse { thread, .. } = to_response::(thread_resp)?; + + let turn_req = mcp + .send_turn_start_request(TurnStartParams { + thread_id: thread.id.clone(), + input: vec![V2UserInput::Text { + text: "Run the tool".to_string(), + text_elements: Vec::new(), + }], + ..Default::default() + }) + .await?; + let turn_resp: JSONRPCResponse = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_response_message(RequestId::Integer(turn_req)), + ) + .await??; + let TurnStartResponse { turn } = to_response::(turn_resp)?; + + let request = timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_request_message(), + ) + .await??; + let (request_id, params) = match request { + ServerRequest::DynamicToolCall { request_id, params } => (request_id, params), + other => panic!("expected DynamicToolCall request, got {other:?}"), + }; + + let expected = DynamicToolCallParams { + thread_id: thread.id, + turn_id: turn.id, + call_id: call_id.to_string(), + tool: tool_name.to_string(), + arguments: tool_args, + }; + assert_eq!(params, expected); + + let response_content_items = vec![ + DynamicToolCallOutputContentItem::InputText { + text: "dynamic-ok".to_string(), + }, + DynamicToolCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]; + let content_items = response_content_items + .clone() + .into_iter() + .map(|item| match item { + DynamicToolCallOutputContentItem::InputText { text } => { + FunctionCallOutputContentItem::InputText { text } + } + DynamicToolCallOutputContentItem::InputImage { image_url } => { + FunctionCallOutputContentItem::InputImage { image_url } + } + }) + .collect::>(); + let response = DynamicToolCallResponse { + content_items: response_content_items, + success: true, + }; + mcp.send_response(request_id, serde_json::to_value(response)?) + .await?; + + timeout( + DEFAULT_READ_TIMEOUT, + mcp.read_stream_until_notification_message("turn/completed"), + ) + .await??; + + let bodies = responses_bodies(&server).await?; + let output_value = bodies + .iter() + .find_map(|body| function_call_output_raw_output(body, call_id)) + .context("expected function_call_output output in follow-up request")?; + assert_eq!( + output_value, + json!([ + { + "type": "input_text", + "text": "dynamic-ok" + }, + { + "type": "input_image", + "image_url": "data:image/png;base64,AAA" + } + ]) + ); + + let payload = bodies + .iter() + .find_map(|body| function_call_output_payload(body, call_id)) + .context("expected function_call_output in follow-up request")?; + assert_eq!( + payload.body, + FunctionCallOutputBody::ContentItems(content_items.clone()) + ); + assert_eq!(payload.success, None); + assert_eq!( + serde_json::to_string(&payload)?, + serde_json::to_string(&content_items)? + ); Ok(()) } @@ -248,7 +414,12 @@ fn find_tool<'a>(body: &'a Value, name: &str) -> Option<&'a Value> { }) } -fn function_call_output_text(body: &Value, call_id: &str) -> Option { +fn function_call_output_payload(body: &Value, call_id: &str) -> Option { + function_call_output_raw_output(body, call_id) + .and_then(|output| serde_json::from_value(output).ok()) +} + +fn function_call_output_raw_output(body: &Value, call_id: &str) -> Option { body.get("input") .and_then(Value::as_array) .and_then(|items| { @@ -258,8 +429,7 @@ fn function_call_output_text(body: &Value, call_id: &str) -> Option { }) }) .and_then(|item| item.get("output")) - .and_then(Value::as_str) - .map(str::to_string) + .cloned() } fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> { diff --git a/codex-rs/core/src/client_common.rs b/codex-rs/core/src/client_common.rs index 2614ce83e..5a4eea883 100644 --- a/codex-rs/core/src/client_common.rs +++ b/codex-rs/core/src/client_common.rs @@ -3,6 +3,7 @@ use crate::config::types::Personality; use crate::error::Result; pub use codex_api::common::ResponseEvent; use codex_protocol::models::BaseInstructions; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::ResponseItem; use futures::Stream; use serde::Deserialize; @@ -97,9 +98,11 @@ fn reserialize_shell_outputs(items: &mut [ResponseItem]) { } ResponseItem::FunctionCallOutput { call_id, output } => { if shell_call_ids.remove(call_id) - && let Some(structured) = parse_structured_shell_output(&output.content) + && let Some(structured) = output + .text_content() + .and_then(parse_structured_shell_output) { - output.content = structured + output.body = FunctionCallOutputBody::Text(structured); } } _ => {} diff --git a/codex-rs/core/src/codex.rs b/codex-rs/core/src/codex.rs index 4068345c3..c522e35b7 100644 --- a/codex-rs/core/src/codex.rs +++ b/codex-rs/core/src/codex.rs @@ -4709,6 +4709,7 @@ mod tests { use crate::tools::format_exec_output_str; use codex_protocol::ThreadId; + use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use crate::protocol::CompactedItem; @@ -5342,13 +5343,14 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&json!({ - "ok": true, - "value": 42 - })) - .unwrap(), + body: FunctionCallOutputBody::Text( + serde_json::to_string(&json!({ + "ok": true, + "value": 42 + })) + .unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); @@ -5385,10 +5387,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&vec![text_block("hello"), text_block("world")]) - .unwrap(), + body: FunctionCallOutputBody::Text( + serde_json::to_string(&vec![text_block("hello"), text_block("world")]).unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); @@ -5405,9 +5407,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&json!({ "message": "bad" })).unwrap(), + body: FunctionCallOutputBody::Text( + serde_json::to_string(&json!({ "message": "bad" })).unwrap(), + ), success: Some(false), - ..Default::default() }; assert_eq!(expected, got); @@ -5424,9 +5427,10 @@ mod tests { let got = FunctionCallOutputPayload::from(&ctr); let expected = FunctionCallOutputPayload { - content: serde_json::to_string(&vec![text_block("alpha")]).unwrap(), + body: FunctionCallOutputBody::Text( + serde_json::to_string(&vec![text_block("alpha")]).unwrap(), + ), success: Some(true), - ..Default::default() }; assert_eq!(expected, got); @@ -6271,7 +6275,10 @@ mod tests { .await; let output = match resp2.expect("expected Ok result") { - ToolOutput::Function { content, .. } => content, + ToolOutput::Function { + body: FunctionCallOutputBody::Text(content), + .. + } => content, _ => panic!("unexpected tool output"), }; diff --git a/codex-rs/core/src/context_manager/history.rs b/codex-rs/core/src/context_manager/history.rs index a68c4daaf..b1bf4086d 100644 --- a/codex-rs/core/src/context_manager/history.rs +++ b/codex-rs/core/src/context_manager/history.rs @@ -10,6 +10,7 @@ use crate::truncate::truncate_function_output_items_with_policy; use crate::truncate::truncate_text; use crate::user_shell_command::is_user_shell_command_text; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -134,7 +135,7 @@ impl ContextManager { match &mut self.items[index] { ResponseItem::FunctionCallOutput { output, .. } => { - let Some(content_items) = output.content_items.as_mut() else { + let Some(content_items) = output.content_items_mut() else { return false; }; let mut replaced = false; @@ -268,19 +269,23 @@ impl ContextManager { let policy_with_serialization_budget = policy * 1.2; match item { ResponseItem::FunctionCallOutput { call_id, output } => { - let truncated = - truncate_text(output.content.as_str(), policy_with_serialization_budget); - let truncated_items = output.content_items.as_ref().map(|items| { - truncate_function_output_items_with_policy( - items, - policy_with_serialization_budget, - ) - }); + let body = match &output.body { + FunctionCallOutputBody::Text(content) => FunctionCallOutputBody::Text( + truncate_text(content, policy_with_serialization_budget), + ), + FunctionCallOutputBody::ContentItems(items) => { + FunctionCallOutputBody::ContentItems( + truncate_function_output_items_with_policy( + items, + policy_with_serialization_budget, + ), + ) + } + }; ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: truncated, - content_items: truncated_items, + body, success: output.success, }, } diff --git a/codex-rs/core/src/context_manager/history_tests.rs b/codex-rs/core/src/context_manager/history_tests.rs index a6eba62f1..c1801e202 100644 --- a/codex-rs/core/src/context_manager/history_tests.rs +++ b/codex-rs/core/src/context_manager/history_tests.rs @@ -3,6 +3,7 @@ use crate::truncate; use crate::truncate::TruncationPolicy; use codex_git::GhostCommit; use codex_protocol::models::ContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::LocalShellAction; @@ -63,10 +64,7 @@ fn user_input_text_msg(text: &str) -> ResponseItem { fn function_call_output(call_id: &str, content: &str) -> ResponseItem { ResponseItem::FunctionCallOutput { call_id: call_id.to_string(), - output: FunctionCallOutputPayload { - content: content.to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text(content.to_string()), } } @@ -263,10 +261,7 @@ fn remove_first_item_removes_matching_output_for_function_call() { }, ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -279,10 +274,7 @@ fn remove_first_item_removes_matching_call_for_output() { let items = vec![ ResponseItem::FunctionCallOutput { call_id: "call-2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ResponseItem::FunctionCall { id: None, @@ -308,10 +300,7 @@ fn remove_last_item_removes_matching_call_for_output() { }, ResponseItem::FunctionCallOutput { call_id: "call-delete-last".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -327,10 +316,11 @@ fn replace_last_turn_images_replaces_tool_output_images() { ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), output: FunctionCallOutputPayload { - content: "ok".to_string(), - content_items: Some(vec![FunctionCallOutputContentItem::InputImage { - image_url: "data:image/png;base64,AAA".to_string(), - }]), + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]), success: Some(true), }, }, @@ -346,10 +336,11 @@ fn replace_last_turn_images_replaces_tool_output_images() { ResponseItem::FunctionCallOutput { call_id: "call-1".to_string(), output: FunctionCallOutputPayload { - content: "ok".to_string(), - content_items: Some(vec![FunctionCallOutputContentItem::InputText { - text: "Invalid image".to_string(), - }]), + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputText { + text: "Invalid image".to_string(), + }, + ]), success: Some(true), }, }, @@ -391,10 +382,7 @@ fn remove_first_item_handles_local_shell_pair() { }, ResponseItem::FunctionCallOutput { call_id: "call-3".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ]; let mut h = create_history_with_items(items); @@ -560,10 +548,7 @@ fn normalization_retains_local_shell_outputs() { }, ResponseItem::FunctionCallOutput { call_id: "shell-1".to_string(), - output: FunctionCallOutputPayload { - content: "Total output lines: 1\n\nok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("Total output lines: 1\n\nok".to_string()), }, ]; @@ -583,9 +568,8 @@ fn record_items_truncates_function_call_output_content() { let item = ResponseItem::FunctionCallOutput { call_id: "call-100".to_string(), output: FunctionCallOutputPayload { - content: long_output.clone(), + body: FunctionCallOutputBody::Text(long_output.clone()), success: Some(true), - ..Default::default() }, }; @@ -594,16 +578,15 @@ fn record_items_truncates_function_call_output_content() { assert_eq!(history.items.len(), 1); match &history.items[0] { ResponseItem::FunctionCallOutput { output, .. } => { - assert_ne!(output.content, long_output); + let content = output.text_content().unwrap_or_default(); + assert_ne!(content, long_output); assert!( - output.content.contains("tokens truncated"), - "expected token-based truncation marker, got {}", - output.content + content.contains("tokens truncated"), + "expected token-based truncation marker, got {content}" ); assert!( - output.content.contains("tokens truncated"), - "expected truncation marker, got {}", - output.content + content.contains("tokens truncated"), + "expected truncation marker, got {content}" ); } other => panic!("unexpected history item: {other:?}"), @@ -648,9 +631,8 @@ fn record_items_respects_custom_token_limit() { let item = ResponseItem::FunctionCallOutput { call_id: "call-custom-limit".to_string(), output: FunctionCallOutputPayload { - content: long_output, + body: FunctionCallOutputBody::Text(long_output), success: Some(true), - ..Default::default() }, }; @@ -660,7 +642,11 @@ fn record_items_respects_custom_token_limit() { ResponseItem::FunctionCallOutput { output, .. } => output, other => panic!("unexpected history item: {other:?}"), }; - assert!(stored.content.contains("tokens truncated")); + assert!( + stored + .text_content() + .is_some_and(|content| content.contains("tokens truncated")) + ); } fn assert_truncated_message_matches(message: &str, line: &str, expected_removed: usize) { @@ -782,10 +768,7 @@ fn normalize_adds_missing_output_for_function_call() { }, ResponseItem::FunctionCallOutput { call_id: "call-x".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -859,10 +842,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() { }, ResponseItem::FunctionCallOutput { call_id: "shell-1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -873,10 +853,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id() { fn normalize_removes_orphan_function_call_output() { let items = vec![ResponseItem::FunctionCallOutput { call_id: "orphan-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); @@ -913,10 +890,7 @@ fn normalize_mixed_inserts_and_removals() { // Orphan output that should be removed ResponseItem::FunctionCallOutput { call_id: "c2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, // Will get an inserted custom tool output ResponseItem::CustomToolCall { @@ -955,10 +929,7 @@ fn normalize_mixed_inserts_and_removals() { }, ResponseItem::FunctionCallOutput { call_id: "c1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ResponseItem::CustomToolCall { id: None, @@ -985,10 +956,7 @@ fn normalize_mixed_inserts_and_removals() { }, ResponseItem::FunctionCallOutput { call_id: "s1".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -1015,10 +983,7 @@ fn normalize_adds_missing_output_for_function_call_inserts_output() { }, ResponseItem::FunctionCallOutput { call_id: "call-x".to_string(), - output: FunctionCallOutputPayload { - content: "aborted".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("aborted".to_string()), }, ] ); @@ -1065,10 +1030,7 @@ fn normalize_adds_missing_output_for_local_shell_call_with_id_panics_in_debug() fn normalize_removes_orphan_function_call_output_panics_in_debug() { let items = vec![ResponseItem::FunctionCallOutput { call_id: "orphan-1".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }]; let mut h = create_history_with_items(items); h.normalize_history(); @@ -1099,10 +1061,7 @@ fn normalize_mixed_inserts_and_removals_panics_in_debug() { }, ResponseItem::FunctionCallOutput { call_id: "c2".to_string(), - output: FunctionCallOutputPayload { - content: "ok".to_string(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".to_string()), }, ResponseItem::CustomToolCall { id: None, diff --git a/codex-rs/core/src/context_manager/normalize.rs b/codex-rs/core/src/context_manager/normalize.rs index 85e25e32a..37e177900 100644 --- a/codex-rs/core/src/context_manager/normalize.rs +++ b/codex-rs/core/src/context_manager/normalize.rs @@ -1,5 +1,6 @@ use std::collections::HashSet; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseItem; @@ -29,7 +30,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: "aborted".to_string(), + body: FunctionCallOutputBody::Text("aborted".to_string()), ..Default::default() }, }, @@ -76,7 +77,7 @@ pub(crate) fn ensure_call_outputs_present(items: &mut Vec) { ResponseItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: "aborted".to_string(), + body: FunctionCallOutputBody::Text("aborted".to_string()), ..Default::default() }, }, diff --git a/codex-rs/core/src/mcp_tool_call.rs b/codex-rs/core/src/mcp_tool_call.rs index 0c9a4a003..737b13024 100644 --- a/codex-rs/core/src/mcp_tool_call.rs +++ b/codex-rs/core/src/mcp_tool_call.rs @@ -11,6 +11,7 @@ use crate::protocol::McpInvocation; use crate::protocol::McpToolCallBeginEvent; use crate::protocol::McpToolCallEndEvent; use codex_protocol::mcp::CallToolResult; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::protocol::AskForApproval; @@ -46,9 +47,8 @@ pub(crate) async fn handle_mcp_tool_call( return ResponseInputItem::FunctionCallOutput { call_id: call_id.clone(), output: FunctionCallOutputPayload { - content: format!("err: {e}"), + body: FunctionCallOutputBody::Text(format!("err: {e}")), success: Some(false), - ..Default::default() }, }; } diff --git a/codex-rs/core/src/stream_events_utils.rs b/codex-rs/core/src/stream_events_utils.rs index 31b4f1837..394f4b932 100644 --- a/codex-rs/core/src/stream_events_utils.rs +++ b/codex-rs/core/src/stream_events_utils.rs @@ -14,6 +14,7 @@ use crate::parse_turn_item; use crate::proposed_plan_parser::strip_proposed_plan_blocks; use crate::tools::parallel::ToolCallRuntime; use crate::tools::router::ToolRouter; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; @@ -107,7 +108,7 @@ pub(crate) async fn handle_output_item_done( let response = ResponseInputItem::FunctionCallOutput { call_id: String::new(), output: FunctionCallOutputPayload { - content: msg.to_string(), + body: FunctionCallOutputBody::Text(msg.to_string()), ..Default::default() }, }; @@ -130,7 +131,7 @@ pub(crate) async fn handle_output_item_done( let response = ResponseInputItem::FunctionCallOutput { call_id: String::new(), output: FunctionCallOutputPayload { - content: message, + body: FunctionCallOutputBody::Text(message), ..Default::default() }, }; @@ -235,9 +236,8 @@ pub(crate) fn response_input_to_response_item(input: &ResponseInputItem) -> Opti let output = match result { Ok(call_tool_result) => FunctionCallOutputPayload::from(call_tool_result), Err(err) => FunctionCallOutputPayload { - content: err.clone(), + body: FunctionCallOutputBody::Text(err.clone()), success: Some(false), - ..Default::default() }, }; Some(ResponseItem::FunctionCallOutput { diff --git a/codex-rs/core/src/tools/context.rs b/codex-rs/core/src/tools/context.rs index f0bbb158f..e9edd7db4 100644 --- a/codex-rs/core/src/tools/context.rs +++ b/codex-rs/core/src/tools/context.rs @@ -5,7 +5,7 @@ use crate::tools::TELEMETRY_PREVIEW_MAX_LINES; use crate::tools::TELEMETRY_PREVIEW_TRUNCATION_NOTICE; use crate::turn_diff_tracker::TurnDiffTracker; use codex_protocol::mcp::CallToolResult; -use codex_protocol::models::FunctionCallOutputContentItem; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ShellToolCallParams; @@ -58,10 +58,9 @@ impl ToolPayload { #[derive(Clone)] pub enum ToolOutput { Function { - // Plain text representation of the tool output. - content: String, - // Some tool calls such as MCP calls may return structured content that can get parsed into an array of polymorphic content items. - content_items: Option>, + // Canonical output body for function-style tools. This may be plain text + // or structured content items. + body: FunctionCallOutputBody, success: Option, }, Mcp { @@ -72,7 +71,9 @@ pub enum ToolOutput { impl ToolOutput { pub fn log_preview(&self) -> String { match self { - ToolOutput::Function { content, .. } => telemetry_preview(content), + ToolOutput::Function { body, .. } => { + telemetry_preview(&body.to_text().unwrap_or_default()) + } ToolOutput::Mcp { result } => format!("{result:?}"), } } @@ -86,27 +87,28 @@ impl ToolOutput { pub fn into_response(self, call_id: &str, payload: &ToolPayload) -> ResponseInputItem { match self { - ToolOutput::Function { - content, - content_items, - success, - } => { + ToolOutput::Function { body, success } => { + // `custom_tool_call` is the Responses API item type for freeform + // tools (`ToolSpec::Freeform`, e.g. freeform `apply_patch`). + // Those payloads must round-trip as `custom_tool_call_output` + // with plain string output. if matches!(payload, ToolPayload::Custom { .. }) { - ResponseInputItem::CustomToolCallOutput { + // Freeform/custom tools (`custom_tool_call`) use the custom + // output wire shape and remain string-only. + return ResponseInputItem::CustomToolCallOutput { call_id: call_id.to_string(), - output: content, - } - } else { - ResponseInputItem::FunctionCallOutput { - call_id: call_id.to_string(), - output: FunctionCallOutputPayload { - content, - content_items, - success, - }, - } + output: body.to_text().unwrap_or_default(), + }; + } + + // Function-style outputs (JSON function tools, including dynamic + // tools and MCP adaptation) preserve the exact body shape. + ResponseInputItem::FunctionCallOutput { + call_id: call_id.to_string(), + output: FunctionCallOutputPayload { body, success }, } } + // Direct MCP response path for MCP tool result envelopes. ToolOutput::Mcp { result } => ResponseInputItem::McpToolCallOutput { call_id: call_id.to_string(), result, @@ -158,6 +160,7 @@ fn telemetry_preview(content: &str) -> String { #[cfg(test)] mod tests { use super::*; + use codex_protocol::models::FunctionCallOutputContentItem; use pretty_assertions::assert_eq; #[test] @@ -166,8 +169,7 @@ mod tests { input: "patch".to_string(), }; let response = ToolOutput::Function { - content: "patched".to_string(), - content_items: None, + body: FunctionCallOutputBody::Text("patched".to_string()), success: Some(true), } .into_response("call-42", &payload); @@ -187,8 +189,7 @@ mod tests { arguments: "{}".to_string(), }; let response = ToolOutput::Function { - content: "ok".to_string(), - content_items: None, + body: FunctionCallOutputBody::Text("ok".to_string()), success: Some(true), } .into_response("fn-1", &payload); @@ -196,14 +197,58 @@ mod tests { match response { ResponseInputItem::FunctionCallOutput { call_id, output } => { assert_eq!(call_id, "fn-1"); - assert_eq!(output.content, "ok"); - assert!(output.content_items.is_none()); + assert_eq!(output.text_content(), Some("ok")); + assert!(output.content_items().is_none()); assert_eq!(output.success, Some(true)); } other => panic!("expected FunctionCallOutput, got {other:?}"), } } + #[test] + fn custom_tool_calls_can_derive_text_from_content_items() { + let payload = ToolPayload::Custom { + input: "patch".to_string(), + }; + let response = ToolOutput::Function { + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputText { + text: "line 1".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "line 2".to_string(), + }, + ]), + success: Some(true), + } + .into_response("call-99", &payload); + + match response { + ResponseInputItem::CustomToolCallOutput { call_id, output } => { + assert_eq!(call_id, "call-99"); + assert_eq!(output, "line 1\nline 2"); + } + other => panic!("expected CustomToolCallOutput, got {other:?}"), + } + } + + #[test] + fn log_preview_uses_content_items_when_plain_text_is_missing() { + let output = ToolOutput::Function { + body: FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputText { + text: "preview".to_string(), + }, + ]), + success: Some(true), + }; + + assert_eq!(output.log_preview(), "preview"); + } + #[test] fn telemetry_preview_returns_original_within_limits() { let content = "short output"; diff --git a/codex-rs/core/src/tools/handlers/apply_patch.rs b/codex-rs/core/src/tools/handlers/apply_patch.rs index 46723decf..aced569ce 100644 --- a/codex-rs/core/src/tools/handlers/apply_patch.rs +++ b/codex-rs/core/src/tools/handlers/apply_patch.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::collections::BTreeMap; use std::path::Path; @@ -109,8 +110,7 @@ impl ToolHandler for ApplyPatchHandler { InternalApplyPatchInvocation::Output(item) => { let content = item?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } @@ -155,8 +155,7 @@ impl ToolHandler for ApplyPatchHandler { ); let content = emitter.finish(event_ctx, out).await?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } @@ -205,8 +204,7 @@ pub(crate) async fn intercept_apply_patch( InternalApplyPatchInvocation::Output(item) => { let content = item?; Ok(Some(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), })) } @@ -242,8 +240,7 @@ pub(crate) async fn intercept_apply_patch( ToolEventCtx::new(session, turn, call_id, tracker.as_ref().copied()); let content = emitter.finish(event_ctx, out).await?; Ok(Some(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), })) } diff --git a/codex-rs/core/src/tools/handlers/collab.rs b/codex-rs/core/src/tools/handlers/collab.rs index 7ef19506d..3110204d2 100644 --- a/codex-rs/core/src/tools/handlers/collab.rs +++ b/codex-rs/core/src/tools/handlers/collab.rs @@ -15,6 +15,7 @@ use crate::tools::registry::ToolKind; use async_trait::async_trait; use codex_protocol::ThreadId; use codex_protocol::models::BaseInstructions; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::protocol::CollabAgentInteractionBeginEvent; use codex_protocol::protocol::CollabAgentInteractionEndEvent; use codex_protocol::protocol::CollabAgentSpawnBeginEvent; @@ -184,9 +185,8 @@ mod spawn { })?; Ok(ToolOutput::Function { - content, + body: FunctionCallOutputBody::Text(content), success: Some(true), - content_items: None, }) } } @@ -273,9 +273,8 @@ mod send_input { })?; Ok(ToolOutput::Function { - content, + body: FunctionCallOutputBody::Text(content), success: Some(true), - content_items: None, }) } } @@ -441,9 +440,8 @@ mod wait { })?; Ok(ToolOutput::Function { - content, + body: FunctionCallOutputBody::Text(content), success: None, - content_items: None, }) } @@ -552,9 +550,8 @@ pub mod close_agent { })?; Ok(ToolOutput::Function { - content, + body: FunctionCallOutputBody::Text(content), success: Some(true), - content_items: None, }) } } @@ -970,7 +967,9 @@ mod tests { .await .expect("wait should succeed"); let ToolOutput::Function { - content, success, .. + body: FunctionCallOutputBody::Text(content), + success, + .. } = output else { panic!("expected function output"); @@ -1012,7 +1011,9 @@ mod tests { .await .expect("wait should succeed"); let ToolOutput::Function { - content, success, .. + body: FunctionCallOutputBody::Text(content), + success, + .. } = output else { panic!("expected function output"); @@ -1103,7 +1104,9 @@ mod tests { .await .expect("wait should succeed"); let ToolOutput::Function { - content, success, .. + body: FunctionCallOutputBody::Text(content), + success, + .. } = output else { panic!("expected function output"); @@ -1141,7 +1144,9 @@ mod tests { .await .expect("close_agent should succeed"); let ToolOutput::Function { - content, success, .. + body: FunctionCallOutputBody::Text(content), + success, + .. } = output else { panic!("expected function output"); diff --git a/codex-rs/core/src/tools/handlers/dynamic.rs b/codex-rs/core/src/tools/handlers/dynamic.rs index a68c70b98..51dc8db56 100644 --- a/codex-rs/core/src/tools/handlers/dynamic.rs +++ b/codex-rs/core/src/tools/handlers/dynamic.rs @@ -10,6 +10,8 @@ use crate::tools::registry::ToolKind; use async_trait::async_trait; use codex_protocol::dynamic_tools::DynamicToolCallRequest; use codex_protocol::dynamic_tools::DynamicToolResponse; +use codex_protocol::models::FunctionCallOutputBody; +use codex_protocol::models::FunctionCallOutputContentItem; use codex_protocol::protocol::EventMsg; use serde_json::Value; use tokio::sync::oneshot; @@ -55,10 +57,19 @@ impl ToolHandler for DynamicToolHandler { ) })?; + let DynamicToolResponse { + content_items, + success, + } = response; + let body = content_items + .into_iter() + .map(FunctionCallOutputContentItem::from) + .collect::>(); + let body = FunctionCallOutputBody::ContentItems(body); + Ok(ToolOutput::Function { - content: response.output, - content_items: None, - success: Some(response.success), + body, + success: Some(success), }) } } diff --git a/codex-rs/core/src/tools/handlers/grep_files.rs b/codex-rs/core/src/tools/handlers/grep_files.rs index a3e89af6a..9fbc6c17a 100644 --- a/codex-rs/core/src/tools/handlers/grep_files.rs +++ b/codex-rs/core/src/tools/handlers/grep_files.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::path::Path; use std::time::Duration; @@ -86,14 +87,12 @@ impl ToolHandler for GrepFilesHandler { if search_results.is_empty() { Ok(ToolOutput::Function { - content: "No matches found.".to_string(), - content_items: None, + body: FunctionCallOutputBody::Text("No matches found.".to_string()), success: Some(false), }) } else { Ok(ToolOutput::Function { - content: search_results.join("\n"), - content_items: None, + body: FunctionCallOutputBody::Text(search_results.join("\n")), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/list_dir.rs b/codex-rs/core/src/tools/handlers/list_dir.rs index a06fca3d1..5535ce0ba 100644 --- a/codex-rs/core/src/tools/handlers/list_dir.rs +++ b/codex-rs/core/src/tools/handlers/list_dir.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::collections::VecDeque; use std::ffi::OsStr; use std::fs::FileType; @@ -102,8 +103,7 @@ impl ToolHandler for ListDirHandler { output.push(format!("Absolute path: {}", path.display())); output.extend(entries); Ok(ToolOutput::Function { - content: output.join("\n"), - content_items: None, + body: FunctionCallOutputBody::Text(output.join("\n")), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/mcp.rs b/codex-rs/core/src/tools/handlers/mcp.rs index 5138b3cd0..8ea46b4ed 100644 --- a/codex-rs/core/src/tools/handlers/mcp.rs +++ b/codex-rs/core/src/tools/handlers/mcp.rs @@ -8,6 +8,7 @@ use crate::tools::context::ToolOutput; use crate::tools::context::ToolPayload; use crate::tools::registry::ToolHandler; use crate::tools::registry::ToolKind; +use codex_protocol::models::ResponseInputItem; pub struct McpHandler; @@ -53,20 +54,11 @@ impl ToolHandler for McpHandler { .await; match response { - codex_protocol::models::ResponseInputItem::McpToolCallOutput { result, .. } => { - Ok(ToolOutput::Mcp { result }) - } - codex_protocol::models::ResponseInputItem::FunctionCallOutput { output, .. } => { - let codex_protocol::models::FunctionCallOutputPayload { - content, - content_items, - success, - } = output; - Ok(ToolOutput::Function { - content, - content_items, - success, - }) + ResponseInputItem::McpToolCallOutput { result, .. } => Ok(ToolOutput::Mcp { result }), + ResponseInputItem::FunctionCallOutput { output, .. } => { + let success = output.success; + let body = output.body; + Ok(ToolOutput::Function { body, success }) } _ => Err(FunctionCallError::RespondToModel( "mcp handler received unexpected response variant".to_string(), diff --git a/codex-rs/core/src/tools/handlers/mcp_resource.rs b/codex-rs/core/src/tools/handlers/mcp_resource.rs index df421bd6d..fe541197c 100644 --- a/codex-rs/core/src/tools/handlers/mcp_resource.rs +++ b/codex-rs/core/src/tools/handlers/mcp_resource.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::collections::HashMap; use std::sync::Arc; use std::time::Duration; @@ -296,12 +297,10 @@ async fn handle_list_resources( match payload_result { Ok(payload) => match serialize_function_output(payload) { Ok(output) => { - let ToolOutput::Function { - content, success, .. - } = &output - else { + let ToolOutput::Function { body, success } = &output else { unreachable!("MCP resource handler should return function output"); }; + let content = body.to_text().unwrap_or_default(); let duration = start.elapsed(); emit_tool_call_end( &session, @@ -309,7 +308,7 @@ async fn handle_list_resources( &call_id, invocation, duration, - Ok(call_tool_result_from_content(content, *success)), + Ok(call_tool_result_from_content(&content, *success)), ) .await; Ok(output) @@ -405,12 +404,10 @@ async fn handle_list_resource_templates( match payload_result { Ok(payload) => match serialize_function_output(payload) { Ok(output) => { - let ToolOutput::Function { - content, success, .. - } = &output - else { + let ToolOutput::Function { body, success } = &output else { unreachable!("MCP resource handler should return function output"); }; + let content = body.to_text().unwrap_or_default(); let duration = start.elapsed(); emit_tool_call_end( &session, @@ -418,7 +415,7 @@ async fn handle_list_resource_templates( &call_id, invocation, duration, - Ok(call_tool_result_from_content(content, *success)), + Ok(call_tool_result_from_content(&content, *success)), ) .await; Ok(output) @@ -494,12 +491,10 @@ async fn handle_read_resource( match payload_result { Ok(payload) => match serialize_function_output(payload) { Ok(output) => { - let ToolOutput::Function { - content, success, .. - } = &output - else { + let ToolOutput::Function { body, success } = &output else { unreachable!("MCP resource handler should return function output"); }; + let content = body.to_text().unwrap_or_default(); let duration = start.elapsed(); emit_tool_call_end( &session, @@ -507,7 +502,7 @@ async fn handle_read_resource( &call_id, invocation, duration, - Ok(call_tool_result_from_content(content, *success)), + Ok(call_tool_result_from_content(&content, *success)), ) .await; Ok(output) @@ -622,8 +617,7 @@ where })?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/plan.rs b/codex-rs/core/src/tools/handlers/plan.rs index 88e0b0dc2..2b43429cc 100644 --- a/codex-rs/core/src/tools/handlers/plan.rs +++ b/codex-rs/core/src/tools/handlers/plan.rs @@ -11,6 +11,7 @@ use crate::tools::registry::ToolKind; use crate::tools::spec::JsonSchema; use async_trait::async_trait; use codex_protocol::config_types::ModeKind; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::plan_tool::UpdatePlanArgs; use codex_protocol::protocol::EventMsg; use std::collections::BTreeMap; @@ -88,8 +89,7 @@ impl ToolHandler for PlanHandler { handle_update_plan(session.as_ref(), turn.as_ref(), arguments, call_id).await?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/read_file.rs b/codex-rs/core/src/tools/handlers/read_file.rs index eb5ce647c..59c6ced7f 100644 --- a/codex-rs/core/src/tools/handlers/read_file.rs +++ b/codex-rs/core/src/tools/handlers/read_file.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::collections::VecDeque; use std::path::PathBuf; @@ -146,8 +147,7 @@ impl ToolHandler for ReadFileHandler { } }; Ok(ToolOutput::Function { - content: collected.join("\n"), - content_items: None, + body: FunctionCallOutputBody::Text(collected.join("\n")), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/request_user_input.rs b/codex-rs/core/src/tools/handlers/request_user_input.rs index 6d014755b..6e1da5641 100644 --- a/codex-rs/core/src/tools/handlers/request_user_input.rs +++ b/codex-rs/core/src/tools/handlers/request_user_input.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use codex_protocol::models::FunctionCallOutputBody; use crate::function_tool::FunctionCallError; use crate::tools::context::ToolInvocation; @@ -120,8 +121,7 @@ impl ToolHandler for RequestUserInputHandler { })?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/shell.rs b/codex-rs/core/src/tools/handlers/shell.rs index f62caea55..cd1aa9056 100644 --- a/codex-rs/core/src/tools/handlers/shell.rs +++ b/codex-rs/core/src/tools/handlers/shell.rs @@ -1,5 +1,6 @@ use async_trait::async_trait; use codex_protocol::ThreadId; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::ShellCommandToolCallParams; use codex_protocol::models::ShellToolCallParams; use std::sync::Arc; @@ -329,8 +330,7 @@ impl ShellHandler { let event_ctx = ToolEventCtx::new(session.as_ref(), turn.as_ref(), &call_id, None); let content = emitter.finish(event_ctx, out).await?; Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/test_sync.rs b/codex-rs/core/src/tools/handlers/test_sync.rs index 643cb464f..4d8fe1025 100644 --- a/codex-rs/core/src/tools/handlers/test_sync.rs +++ b/codex-rs/core/src/tools/handlers/test_sync.rs @@ -1,3 +1,4 @@ +use codex_protocol::models::FunctionCallOutputBody; use std::collections::HashMap; use std::collections::hash_map::Entry; use std::sync::Arc; @@ -91,8 +92,7 @@ impl ToolHandler for TestSyncHandler { } Ok(ToolOutput::Function { - content: "ok".to_string(), - content_items: None, + body: FunctionCallOutputBody::Text("ok".to_string()), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/unified_exec.rs b/codex-rs/core/src/tools/handlers/unified_exec.rs index 2e331bae5..1eb51dd73 100644 --- a/codex-rs/core/src/tools/handlers/unified_exec.rs +++ b/codex-rs/core/src/tools/handlers/unified_exec.rs @@ -18,6 +18,7 @@ use crate::unified_exec::UnifiedExecProcessManager; use crate::unified_exec::UnifiedExecResponse; use crate::unified_exec::WriteStdinRequest; use async_trait::async_trait; +use codex_protocol::models::FunctionCallOutputBody; use serde::Deserialize; use std::path::PathBuf; use std::sync::Arc; @@ -238,8 +239,7 @@ impl ToolHandler for UnifiedExecHandler { let content = format_response(&response); Ok(ToolOutput::Function { - content, - content_items: None, + body: FunctionCallOutputBody::Text(content), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/handlers/view_image.rs b/codex-rs/core/src/tools/handlers/view_image.rs index 87dd7207b..fe6252218 100644 --- a/codex-rs/core/src/tools/handlers/view_image.rs +++ b/codex-rs/core/src/tools/handlers/view_image.rs @@ -1,4 +1,5 @@ use async_trait::async_trait; +use codex_protocol::models::FunctionCallOutputBody; use serde::Deserialize; use tokio::fs; @@ -92,8 +93,7 @@ impl ToolHandler for ViewImageHandler { .await; Ok(ToolOutput::Function { - content: "attached local image path".to_string(), - content_items: None, + body: FunctionCallOutputBody::Text("attached local image path".to_string()), success: Some(true), }) } diff --git a/codex-rs/core/src/tools/parallel.rs b/codex-rs/core/src/tools/parallel.rs index dcd3ae40a..ca08048bd 100644 --- a/codex-rs/core/src/tools/parallel.rs +++ b/codex-rs/core/src/tools/parallel.rs @@ -17,6 +17,7 @@ use crate::tools::context::SharedTurnDiffTracker; use crate::tools::context::ToolPayload; use crate::tools::router::ToolCall; use crate::tools::router::ToolRouter; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::FunctionCallOutputPayload; use codex_protocol::models::ResponseInputItem; @@ -119,7 +120,7 @@ impl ToolCallRuntime { _ => ResponseInputItem::FunctionCallOutput { call_id: call.call_id.clone(), output: FunctionCallOutputPayload { - content: Self::abort_message(call, secs), + body: FunctionCallOutputBody::Text(Self::abort_message(call, secs)), ..Default::default() }, }, diff --git a/codex-rs/core/src/tools/router.rs b/codex-rs/core/src/tools/router.rs index 51328ccc9..1eb6190bc 100644 --- a/codex-rs/core/src/tools/router.rs +++ b/codex-rs/core/src/tools/router.rs @@ -11,6 +11,7 @@ use crate::tools::registry::ToolRegistry; use crate::tools::spec::ToolsConfig; use crate::tools::spec::build_specs; use codex_protocol::dynamic_tools::DynamicToolSpec; +use codex_protocol::models::FunctionCallOutputBody; use codex_protocol::models::LocalShellAction; use codex_protocol::models::ResponseInputItem; use codex_protocol::models::ResponseItem; @@ -181,9 +182,8 @@ impl ToolRouter { ResponseInputItem::FunctionCallOutput { call_id, output: codex_protocol::models::FunctionCallOutputPayload { - content: message, + body: FunctionCallOutputBody::Text(message), success: Some(false), - ..Default::default() }, } } diff --git a/codex-rs/core/tests/suite/client.rs b/codex-rs/core/tests/suite/client.rs index 62135127f..b6743a683 100644 --- a/codex-rs/core/tests/suite/client.rs +++ b/codex-rs/core/tests/suite/client.rs @@ -1314,10 +1314,7 @@ async fn azure_responses_request_includes_store_and_reasoning_ids() { }); prompt.input.push(ResponseItem::FunctionCallOutput { call_id: "function-call-id".into(), - output: FunctionCallOutputPayload { - content: "ok".into(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".into()), }); prompt.input.push(ResponseItem::LocalShellCall { id: Some("local-shell-id".into()), diff --git a/codex-rs/protocol/src/dynamic_tools.rs b/codex-rs/protocol/src/dynamic_tools.rs index e55d372d8..8b5405f30 100644 --- a/codex-rs/protocol/src/dynamic_tools.rs +++ b/codex-rs/protocol/src/dynamic_tools.rs @@ -24,7 +24,16 @@ pub struct DynamicToolCallRequest { #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(rename_all = "camelCase")] pub struct DynamicToolResponse { - pub call_id: String, - pub output: String, + pub content_items: Vec, pub success: bool, } + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] +#[serde(tag = "type", rename_all = "camelCase")] +#[ts(tag = "type")] +pub enum DynamicToolCallOutputContentItem { + #[serde(rename_all = "camelCase")] + InputText { text: String }, + #[serde(rename_all = "camelCase")] + InputImage { image_url: String }, +} diff --git a/codex-rs/protocol/src/models.rs b/codex-rs/protocol/src/models.rs index 4231423a6..15114d970 100644 --- a/codex-rs/protocol/src/models.rs +++ b/codex-rs/protocol/src/models.rs @@ -129,11 +129,11 @@ pub enum ResponseItem { arguments: String, call_id: String, }, - // NOTE: The input schema for `function_call_output` objects that clients send to the - // OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the - // SSE stream. When *sending* we must wrap the string output inside an object that includes a - // required `success` boolean. To ensure we serialize exactly the expected shape we introduce - // a dedicated payload struct and flatten it here. + // NOTE: The `output` field for `function_call_output` uses a dedicated payload type with + // custom serialization. On the wire it is either: + // - a plain string (`content`) + // - an array of structured content items (`content_items`) + // We keep this behavior centralized in `FunctionCallOutputPayload`. FunctionCallOutput { call_id: String, output: FunctionCallOutputPayload, @@ -617,9 +617,8 @@ impl From for ResponseItem { let output = match result { Ok(result) => FunctionCallOutputPayload::from(&result), Err(tool_call_err) => FunctionCallOutputPayload { - content: format!("err: {tool_call_err:?}"), + body: FunctionCallOutputBody::Text(format!("err: {tool_call_err:?}")), success: Some(false), - ..Default::default() }, }; Self::FunctionCallOutput { call_id, output } @@ -780,39 +779,146 @@ pub enum FunctionCallOutputContentItem { InputImage { image_url: String }, } +/// Converts structured function-call output content into plain text for +/// human-readable surfaces. +/// +/// This conversion is intentionally lossy: +/// - only `input_text` items are included +/// - image items are ignored +/// +/// We use this helper where callers still need a string representation (for +/// example telemetry previews or legacy string-only output paths) while keeping +/// the original multimodal `content_items` as the authoritative payload sent to +/// the model. +pub fn function_call_output_content_items_to_text( + content_items: &[FunctionCallOutputContentItem], +) -> Option { + let text_segments = content_items + .iter() + .filter_map(|item| match item { + FunctionCallOutputContentItem::InputText { text } if !text.trim().is_empty() => { + Some(text.as_str()) + } + FunctionCallOutputContentItem::InputText { .. } + | FunctionCallOutputContentItem::InputImage { .. } => None, + }) + .collect::>(); + + if text_segments.is_empty() { + None + } else { + Some(text_segments.join("\n")) + } +} + +impl From + for FunctionCallOutputContentItem +{ + fn from(item: crate::dynamic_tools::DynamicToolCallOutputContentItem) -> Self { + match item { + crate::dynamic_tools::DynamicToolCallOutputContentItem::InputText { text } => { + Self::InputText { text } + } + crate::dynamic_tools::DynamicToolCallOutputContentItem::InputImage { image_url } => { + Self::InputImage { image_url } + } + } + } +} + /// The payload we send back to OpenAI when reporting a tool call result. /// -/// `content` preserves the historical plain-string payload so downstream -/// integrations (tests, logging, etc.) can keep treating tool output as -/// `String`. When an MCP server returns richer data we additionally populate -/// `content_items` with the structured form that the Responses API understands. +/// `body` serializes directly as the wire value for `function_call_output.output`. +/// `success` remains internal metadata for downstream handling. #[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)] pub struct FunctionCallOutputPayload { - pub content: String, - #[serde(skip_serializing_if = "Option::is_none")] - pub content_items: Option>, + pub body: FunctionCallOutputBody, pub success: Option, } -#[derive(Deserialize)] +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)] #[serde(untagged)] -enum FunctionCallOutputPayloadSerde { +pub enum FunctionCallOutputBody { Text(String), - Items(Vec), + ContentItems(Vec), } -// The Responses API expects two *different* shapes depending on success vs failure: -// • success → output is a plain string (no nested object) -// • failure → output is an object { content, success:false } +impl FunctionCallOutputBody { + /// Best-effort conversion of a function-call output body to plain text for + /// human-readable surfaces. + /// + /// This conversion is intentionally lossy when the body contains content + /// items: image entries are dropped and text entries are joined with + /// newlines. + pub fn to_text(&self) -> Option { + match self { + Self::Text(content) => Some(content.clone()), + Self::ContentItems(items) => function_call_output_content_items_to_text(items), + } + } +} + +impl Default for FunctionCallOutputBody { + fn default() -> Self { + Self::Text(String::new()) + } +} + +impl FunctionCallOutputPayload { + pub fn from_text(content: String) -> Self { + Self { + body: FunctionCallOutputBody::Text(content), + success: None, + } + } + + pub fn from_content_items(content_items: Vec) -> Self { + Self { + body: FunctionCallOutputBody::ContentItems(content_items), + success: None, + } + } + + pub fn text_content(&self) -> Option<&str> { + match &self.body { + FunctionCallOutputBody::Text(content) => Some(content), + FunctionCallOutputBody::ContentItems(_) => None, + } + } + + pub fn text_content_mut(&mut self) -> Option<&mut String> { + match &mut self.body { + FunctionCallOutputBody::Text(content) => Some(content), + FunctionCallOutputBody::ContentItems(_) => None, + } + } + + pub fn content_items(&self) -> Option<&[FunctionCallOutputContentItem]> { + match &self.body { + FunctionCallOutputBody::Text(_) => None, + FunctionCallOutputBody::ContentItems(items) => Some(items), + } + } + + pub fn content_items_mut(&mut self) -> Option<&mut Vec> { + match &mut self.body { + FunctionCallOutputBody::Text(_) => None, + FunctionCallOutputBody::ContentItems(items) => Some(items), + } + } +} + +// `function_call_output.output` is encoded as either: +// - an array of structured content items +// - a plain string impl Serialize for FunctionCallOutputPayload { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - if let Some(items) = &self.content_items { - items.serialize(serializer) - } else { - serializer.serialize_str(&self.content) + match &self.body { + FunctionCallOutputBody::Text(content) => serializer.serialize_str(content), + FunctionCallOutputBody::ContentItems(items) => items.serialize(serializer), } } } @@ -822,20 +928,11 @@ impl<'de> Deserialize<'de> for FunctionCallOutputPayload { where D: Deserializer<'de>, { - match FunctionCallOutputPayloadSerde::deserialize(deserializer)? { - FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload { - content, - ..Default::default() - }), - FunctionCallOutputPayloadSerde::Items(items) => { - let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?; - Ok(FunctionCallOutputPayload { - content, - content_items: Some(items), - success: None, - }) - } - } + let body = FunctionCallOutputBody::deserialize(deserializer)?; + Ok(FunctionCallOutputPayload { + body, + success: None, + }) } } @@ -856,16 +953,14 @@ impl From<&CallToolResult> for FunctionCallOutputPayload { match serde_json::to_string(structured_content) { Ok(serialized_structured_content) => { return FunctionCallOutputPayload { - content: serialized_structured_content, + body: FunctionCallOutputBody::Text(serialized_structured_content), success: Some(is_success), - ..Default::default() }; } Err(err) => { return FunctionCallOutputPayload { - content: err.to_string(), + body: FunctionCallOutputBody::Text(err.to_string()), success: Some(false), - ..Default::default() }; } } @@ -875,18 +970,21 @@ impl From<&CallToolResult> for FunctionCallOutputPayload { Ok(serialized_content) => serialized_content, Err(err) => { return FunctionCallOutputPayload { - content: err.to_string(), + body: FunctionCallOutputBody::Text(err.to_string()), success: Some(false), - ..Default::default() }; } }; let content_items = convert_mcp_content_to_items(content); + let body = match content_items { + Some(content_items) => FunctionCallOutputBody::ContentItems(content_items), + None => FunctionCallOutputBody::Text(serialized_content), + }; + FunctionCallOutputPayload { - content: serialized_content, - content_items, + body, success: Some(is_success), } } @@ -937,19 +1035,18 @@ fn convert_mcp_content_to_items( } // Implement Display so callers can treat the payload like a plain string when logging or doing -// trivial substring checks in tests (existing tests call `.contains()` on the output). Display -// returns the raw `content` field. +// trivial substring checks in tests (existing tests call `.contains()` on the output). For +// `ContentItems`, Display emits a JSON representation. impl std::fmt::Display for FunctionCallOutputPayload { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str(&self.content) - } -} - -impl std::ops::Deref for FunctionCallOutputPayload { - type Target = str; - fn deref(&self) -> &Self::Target { - &self.content + match &self.body { + FunctionCallOutputBody::Text(content) => f.write_str(content), + FunctionCallOutputBody::ContentItems(items) => { + let content = serde_json::to_string(items).unwrap_or_default(); + f.write_str(content.as_str()) + } + } } } @@ -1010,6 +1107,61 @@ mod tests { assert_eq!(convert_mcp_content_to_items(&contents), None); } + #[test] + fn function_call_output_content_items_to_text_joins_text_segments() { + let content_items = vec![ + FunctionCallOutputContentItem::InputText { + text: "line 1".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + FunctionCallOutputContentItem::InputText { + text: "line 2".to_string(), + }, + ]; + + let text = function_call_output_content_items_to_text(&content_items); + assert_eq!(text, Some("line 1\nline 2".to_string())); + } + + #[test] + fn function_call_output_content_items_to_text_ignores_blank_text_and_images() { + let content_items = vec![ + FunctionCallOutputContentItem::InputText { + text: " ".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]; + + let text = function_call_output_content_items_to_text(&content_items); + assert_eq!(text, None); + } + + #[test] + fn function_call_output_body_to_text_returns_plain_text_content() { + let body = FunctionCallOutputBody::Text("ok".to_string()); + let text = body.to_text(); + assert_eq!(text, Some("ok".to_string())); + } + + #[test] + fn function_call_output_body_to_text_uses_content_item_fallback() { + let body = FunctionCallOutputBody::ContentItems(vec![ + FunctionCallOutputContentItem::InputText { + text: "line 1".to_string(), + }, + FunctionCallOutputContentItem::InputImage { + image_url: "data:image/png;base64,AAA".to_string(), + }, + ]); + + let text = body.to_text(); + assert_eq!(text, Some("line 1".to_string())); + } + #[test] fn converts_sandbox_mode_into_developer_instructions() { let workspace_write: DeveloperInstructions = SandboxMode::WorkspaceWrite.into(); @@ -1156,10 +1308,7 @@ mod tests { fn serializes_success_as_plain_string() -> Result<()> { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), - output: FunctionCallOutputPayload { - content: "ok".into(), - ..Default::default() - }, + output: FunctionCallOutputPayload::from_text("ok".into()), }; let json = serde_json::to_string(&item)?; @@ -1175,9 +1324,8 @@ mod tests { let item = ResponseInputItem::FunctionCallOutput { call_id: "call1".into(), output: FunctionCallOutputPayload { - content: "bad".into(), + body: FunctionCallOutputBody::Text("bad".into()), success: Some(false), - ..Default::default() }, }; @@ -1202,7 +1350,10 @@ mod tests { let payload = FunctionCallOutputPayload::from(&call_tool_result); assert_eq!(payload.success, Some(true)); - let items = payload.content_items.clone().expect("content items"); + let Some(items) = payload.content_items() else { + panic!("expected content items"); + }; + let items = items.to_vec(); assert_eq!( items, vec![ @@ -1243,9 +1394,10 @@ mod tests { }; let payload = FunctionCallOutputPayload::from(&call_tool_result); - let Some(items) = payload.content_items else { + let Some(items) = payload.content_items() else { panic!("expected content items"); }; + let items = items.to_vec(); assert_eq!( items, vec![FunctionCallOutputContentItem::InputImage { @@ -1274,10 +1426,14 @@ mod tests { image_url: "data:image/png;base64,XYZ".into(), }, ]; - assert_eq!(payload.content_items, Some(expected_items.clone())); - - let expected_content = serde_json::to_string(&expected_items)?; - assert_eq!(payload.content, expected_content); + assert_eq!( + payload.body, + FunctionCallOutputBody::ContentItems(expected_items.clone()) + ); + assert_eq!( + serde_json::to_string(&payload)?, + serde_json::to_string(&expected_items)? + ); Ok(()) }