diff --git a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json index 571079c7c..5a5ddd101 100644 --- a/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json +++ b/codex-rs/app-server-protocol/schema/json/codex_app_server_protocol.schemas.json @@ -11280,6 +11280,25 @@ }, "type": "object" }, + "InputModality": { + "description": "Canonical user-input modality tags advertised by a model.", + "oneOf": [ + { + "description": "Plain text turns and tool payloads.", + "enum": [ + "text" + ], + "type": "string" + }, + { + "description": "Image attachments included in user turns.", + "enum": [ + "image" + ], + "type": "string" + } + ] + }, "ItemCompletedNotification": { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { @@ -11780,6 +11799,16 @@ "id": { "type": "string" }, + "inputModalities": { + "default": [ + "text", + "image" + ], + "items": { + "$ref": "#/definitions/v2/InputModality" + }, + "type": "array" + }, "isDefault": { "type": "boolean" }, diff --git a/codex-rs/app-server-protocol/schema/json/v2/ModelListResponse.json b/codex-rs/app-server-protocol/schema/json/v2/ModelListResponse.json index d41f13a87..8c8126bb6 100644 --- a/codex-rs/app-server-protocol/schema/json/v2/ModelListResponse.json +++ b/codex-rs/app-server-protocol/schema/json/v2/ModelListResponse.json @@ -1,6 +1,25 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "definitions": { + "InputModality": { + "description": "Canonical user-input modality tags advertised by a model.", + "oneOf": [ + { + "description": "Plain text turns and tool payloads.", + "enum": [ + "text" + ], + "type": "string" + }, + { + "description": "Image attachments included in user turns.", + "enum": [ + "image" + ], + "type": "string" + } + ] + }, "Model": { "properties": { "defaultReasoningEffort": { @@ -15,6 +34,16 @@ "id": { "type": "string" }, + "inputModalities": { + "default": [ + "text", + "image" + ], + "items": { + "$ref": "#/definitions/InputModality" + }, + "type": "array" + }, "isDefault": { "type": "boolean" }, diff --git a/codex-rs/app-server-protocol/schema/typescript/InputModality.ts b/codex-rs/app-server-protocol/schema/typescript/InputModality.ts new file mode 100644 index 000000000..73661938b --- /dev/null +++ b/codex-rs/app-server-protocol/schema/typescript/InputModality.ts @@ -0,0 +1,8 @@ +// GENERATED CODE! DO NOT MODIFY BY HAND! + +// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. + +/** + * Canonical user-input modality tags advertised by a model. + */ +export type InputModality = "text" | "image"; diff --git a/codex-rs/app-server-protocol/schema/typescript/index.ts b/codex-rs/app-server-protocol/schema/typescript/index.ts index b24e42a83..91da0708e 100644 --- a/codex-rs/app-server-protocol/schema/typescript/index.ts +++ b/codex-rs/app-server-protocol/schema/typescript/index.ts @@ -90,6 +90,7 @@ export type { InitializeCapabilities } from "./InitializeCapabilities"; export type { InitializeParams } from "./InitializeParams"; export type { InitializeResponse } from "./InitializeResponse"; export type { InputItem } from "./InputItem"; +export type { InputModality } from "./InputModality"; export type { InterruptConversationParams } from "./InterruptConversationParams"; export type { InterruptConversationResponse } from "./InterruptConversationResponse"; export type { ItemCompletedEvent } from "./ItemCompletedEvent"; diff --git a/codex-rs/app-server-protocol/schema/typescript/v2/Model.ts b/codex-rs/app-server-protocol/schema/typescript/v2/Model.ts index b664024d0..72daa93ed 100644 --- a/codex-rs/app-server-protocol/schema/typescript/v2/Model.ts +++ b/codex-rs/app-server-protocol/schema/typescript/v2/Model.ts @@ -1,7 +1,8 @@ // GENERATED CODE! DO NOT MODIFY BY HAND! // This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually. +import type { InputModality } from "../InputModality"; import type { ReasoningEffort } from "../ReasoningEffort"; import type { ReasoningEffortOption } from "./ReasoningEffortOption"; -export type Model = { id: string, model: string, displayName: string, description: string, supportedReasoningEfforts: Array, defaultReasoningEffort: ReasoningEffort, supportsPersonality: boolean, isDefault: boolean, }; +export type Model = { id: string, model: string, displayName: string, description: string, supportedReasoningEfforts: Array, defaultReasoningEffort: ReasoningEffort, inputModalities: Array, supportsPersonality: boolean, isDefault: boolean, }; diff --git a/codex-rs/app-server-protocol/src/protocol/v2.rs b/codex-rs/app-server-protocol/src/protocol/v2.rs index 4f37e2889..19db13723 100644 --- a/codex-rs/app-server-protocol/src/protocol/v2.rs +++ b/codex-rs/app-server-protocol/src/protocol/v2.rs @@ -19,7 +19,9 @@ use codex_protocol::mcp::Resource as McpResource; use codex_protocol::mcp::ResourceTemplate as McpResourceTemplate; use codex_protocol::mcp::Tool as McpTool; use codex_protocol::models::ResponseItem; +use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::ReasoningEffort; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::parse_command::ParsedCommand as CoreParsedCommand; use codex_protocol::plan_tool::PlanItemArg as CorePlanItemArg; use codex_protocol::plan_tool::StepStatus as CorePlanStepStatus; @@ -992,6 +994,8 @@ pub struct Model { pub description: String, pub supported_reasoning_efforts: Vec, pub default_reasoning_effort: ReasoningEffort, + #[serde(default = "default_input_modalities")] + pub input_modalities: Vec, #[serde(default)] pub supports_personality: bool, // Only one model should be marked as default. diff --git a/codex-rs/app-server/src/models.rs b/codex-rs/app-server/src/models.rs index 4189435c0..133dc73cc 100644 --- a/codex-rs/app-server/src/models.rs +++ b/codex-rs/app-server/src/models.rs @@ -28,6 +28,7 @@ fn model_from_preset(preset: ModelPreset) -> Model { preset.supported_reasoning_efforts, ), default_reasoning_effort: preset.default_reasoning_effort, + input_modalities: preset.input_modalities, supports_personality: preset.supports_personality, is_default: preset.is_default, } diff --git a/codex-rs/app-server/tests/common/models_cache.rs b/codex-rs/app-server/tests/common/models_cache.rs index c3f00cf09..14b4e8d45 100644 --- a/codex-rs/app-server/tests/common/models_cache.rs +++ b/codex-rs/app-server/tests/common/models_cache.rs @@ -6,6 +6,7 @@ use codex_protocol::openai_models::ModelInfo; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelVisibility; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use serde_json::json; use std::path::Path; @@ -38,6 +39,7 @@ fn preset_to_info(preset: &ModelPreset, priority: i32) -> ModelInfo { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/app-server/tests/suite/v2/model_list.rs b/codex-rs/app-server/tests/suite/v2/model_list.rs index a496e4a59..c3b4ec708 100644 --- a/codex-rs/app-server/tests/suite/v2/model_list.rs +++ b/codex-rs/app-server/tests/suite/v2/model_list.rs @@ -12,6 +12,7 @@ use codex_app_server_protocol::ModelListParams; use codex_app_server_protocol::ModelListResponse; use codex_app_server_protocol::ReasoningEffortOption; use codex_app_server_protocol::RequestId; +use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::ReasoningEffort; use pretty_assertions::assert_eq; use tempfile::TempDir; @@ -72,6 +73,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { }, ], default_reasoning_effort: ReasoningEffort::Medium, + input_modalities: vec![InputModality::Text, InputModality::Image], supports_personality: false, is_default: true, }, @@ -100,6 +102,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { }, ], default_reasoning_effort: ReasoningEffort::Medium, + input_modalities: vec![InputModality::Text, InputModality::Image], supports_personality: false, is_default: false, }, @@ -120,6 +123,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { }, ], default_reasoning_effort: ReasoningEffort::Medium, + input_modalities: vec![InputModality::Text, InputModality::Image], supports_personality: false, is_default: false, }, @@ -154,6 +158,7 @@ async fn list_models_returns_all_models_with_large_limit() -> Result<()> { }, ], default_reasoning_effort: ReasoningEffort::Medium, + input_modalities: vec![InputModality::Text, InputModality::Image], supports_personality: false, is_default: false, }, diff --git a/codex-rs/codex-api/tests/models_integration.rs b/codex-rs/codex-api/tests/models_integration.rs index c75c46a28..2145be654 100644 --- a/codex-rs/codex-api/tests/models_integration.rs +++ b/codex-rs/codex-api/tests/models_integration.rs @@ -11,6 +11,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use http::HeaderMap; use http::Method; use wiremock::Mock; @@ -88,6 +89,7 @@ async fn models_client_hits_models_endpoint() { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }], }; diff --git a/codex-rs/core/src/models_manager/model_info.rs b/codex-rs/core/src/models_manager/model_info.rs index eaab16000..5cccefdd2 100644 --- a/codex-rs/core/src/models_manager/model_info.rs +++ b/codex-rs/core/src/models_manager/model_info.rs @@ -9,6 +9,7 @@ use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationMode; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use crate::config::Config; use crate::features::Feature; @@ -66,6 +67,7 @@ macro_rules! model_info { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; $( diff --git a/codex-rs/core/src/models_manager/model_presets.rs b/codex-rs/core/src/models_manager/model_presets.rs index f9105c644..a597f7f92 100644 --- a/codex-rs/core/src/models_manager/model_presets.rs +++ b/codex-rs/core/src/models_manager/model_presets.rs @@ -3,6 +3,7 @@ use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use indoc::indoc; use once_cell::sync::Lazy; @@ -41,6 +42,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex-max".to_string(), @@ -71,6 +73,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex-mini".to_string(), @@ -94,6 +97,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.2".to_string(), @@ -124,6 +128,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "bengalfox".to_string(), @@ -154,6 +159,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "boomslang".to_string(), @@ -184,6 +190,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, // Deprecated models. ModelPreset { @@ -211,6 +218,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5-codex-mini".to_string(), @@ -233,6 +241,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1-codex".to_string(), @@ -260,6 +269,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5".to_string(), @@ -290,6 +300,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ModelPreset { id: "gpt-5.1".to_string(), @@ -316,6 +327,7 @@ static PRESETS: Lazy> = Lazy::new(|| { upgrade: Some(gpt_52_codex_upgrade()), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), }, ] }); diff --git a/codex-rs/core/tests/suite/list_models.rs b/codex-rs/core/tests/suite/list_models.rs index f6db54af7..aee3a60e0 100644 --- a/codex-rs/core/tests/suite/list_models.rs +++ b/codex-rs/core/tests/suite/list_models.rs @@ -7,6 +7,7 @@ use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ModelUpgrade; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use core_test_support::load_default_config_for_test; use indoc::indoc; use pretty_assertions::assert_eq; @@ -99,6 +100,7 @@ fn gpt_52_codex() -> ModelPreset { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -142,6 +144,7 @@ fn gpt_5_1_codex_max() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -177,6 +180,7 @@ fn gpt_5_1_codex_mini() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -222,6 +226,7 @@ fn gpt_5_2() -> ModelPreset { )), show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -255,6 +260,7 @@ fn bengalfox() -> ModelPreset { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -288,6 +294,7 @@ fn boomslang() -> ModelPreset { upgrade: None, show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -327,6 +334,7 @@ fn gpt_5_codex() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -362,6 +370,7 @@ fn gpt_5_codex_mini() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -401,6 +410,7 @@ fn gpt_5_1_codex() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -444,6 +454,7 @@ fn gpt_5() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } @@ -483,6 +494,7 @@ fn gpt_5_1() -> ModelPreset { )), show_in_picker: false, supported_in_api: true, + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/core/tests/suite/models_cache_ttl.rs b/codex-rs/core/tests/suite/models_cache_ttl.rs index a6f3c1bdb..49d05b83e 100644 --- a/codex-rs/core/tests/suite/models_cache_ttl.rs +++ b/codex-rs/core/tests/suite/models_cache_ttl.rs @@ -19,6 +19,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::responses; use core_test_support::responses::ev_assistant_message; @@ -349,5 +350,6 @@ fn test_remote_model(slug: &str, priority: i32) -> ModelInfo { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/core/tests/suite/personality.rs b/codex-rs/core/tests/suite/personality.rs index 85cc25f4c..87978ceb5 100644 --- a/codex-rs/core/tests/suite/personality.rs +++ b/codex-rs/core/tests/suite/personality.rs @@ -16,6 +16,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::load_default_config_for_test; use core_test_support::responses::ev_completed; @@ -512,6 +513,7 @@ async fn ignores_remote_personality_if_remote_models_disabled() -> anyhow::Resul auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( @@ -627,6 +629,7 @@ async fn remote_model_friendly_personality_instructions_with_feature() -> anyhow auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( @@ -737,6 +740,7 @@ async fn user_turn_personality_remote_model_template_includes_update_message() - auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: Vec::new(), + input_modalities: default_input_modalities(), }; let _models_mock = mount_models_once( diff --git a/codex-rs/core/tests/suite/remote_models.rs b/codex-rs/core/tests/suite/remote_models.rs index f59f73fd1..ed46855f9 100644 --- a/codex-rs/core/tests/suite/remote_models.rs +++ b/codex-rs/core/tests/suite/remote_models.rs @@ -25,6 +25,7 @@ use codex_protocol::openai_models::ModelsResponse; use codex_protocol::openai_models::ReasoningEffort; use codex_protocol::openai_models::ReasoningEffortPreset; use codex_protocol::openai_models::TruncationPolicyConfig; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::user_input::UserInput; use core_test_support::load_default_config_for_test; use core_test_support::responses::ev_assistant_message; @@ -76,6 +77,7 @@ async fn remote_models_remote_model_uses_unified_exec() -> Result<()> { shell_type: ConfigShellToolType::UnifiedExec, visibility: ModelVisibility::List, supported_in_api: true, + input_modalities: default_input_modalities(), priority: 1, upgrade: None, base_instructions: "base instructions".to_string(), @@ -313,6 +315,7 @@ async fn remote_models_apply_remote_base_instructions() -> Result<()> { shell_type: ConfigShellToolType::ShellCommand, visibility: ModelVisibility::List, supported_in_api: true, + input_modalities: default_input_modalities(), priority: 1, upgrade: None, base_instructions: remote_base.to_string(), @@ -787,6 +790,7 @@ fn test_remote_model_with_policy( shell_type: ConfigShellToolType::ShellCommand, visibility, supported_in_api: true, + input_modalities: default_input_modalities(), priority, upgrade: None, base_instructions: "base instructions".to_string(), diff --git a/codex-rs/protocol/src/openai_models.rs b/codex-rs/protocol/src/openai_models.rs index 9a940539d..90cf34f39 100644 --- a/codex-rs/protocol/src/openai_models.rs +++ b/codex-rs/protocol/src/openai_models.rs @@ -1,3 +1,8 @@ +//! Shared model metadata types exchanged between Codex services and clients. +//! +//! These types are serialized across core, TUI, app-server, and SDK boundaries, so field defaults +//! are used to preserve compatibility when older payloads omit newly introduced attributes. + use std::collections::HashMap; use std::collections::HashSet; @@ -43,6 +48,38 @@ pub enum ReasoningEffort { XHigh, } +/// Canonical user-input modality tags advertised by a model. +#[derive( + Debug, + Serialize, + Deserialize, + Clone, + Copy, + PartialEq, + Eq, + Display, + JsonSchema, + TS, + EnumIter, + Hash, +)] +#[serde(rename_all = "lowercase")] +#[strum(serialize_all = "lowercase")] +pub enum InputModality { + /// Plain text turns and tool payloads. + Text, + /// Image attachments included in user turns. + Image, +} + +/// Backward-compatible default when `input_modalities` is omitted on the wire. +/// +/// Legacy payloads predate modality metadata, so we conservatively assume both text and images are +/// accepted unless a preset explicitly narrows support. +pub fn default_input_modalities() -> Vec { + vec![InputModality::Text, InputModality::Image] +} + /// A reasoning effort option that can be surfaced for a model. #[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema, PartialEq, Eq)] pub struct ReasoningEffortPreset { @@ -88,6 +125,9 @@ pub struct ModelPreset { pub show_in_picker: bool, /// whether this model is supported in the api pub supported_in_api: bool, + /// Input modalities accepted when composing user turns for this preset. + #[serde(default = "default_input_modalities")] + pub input_modalities: Vec, } /// Visibility of a model in the picker or APIs. @@ -206,6 +246,9 @@ pub struct ModelInfo { #[serde(default = "default_effective_context_window_percent")] pub effective_context_window_percent: i64, pub experimental_supported_tools: Vec, + /// Input modalities accepted by the backend for this model. + #[serde(default = "default_input_modalities")] + pub input_modalities: Vec, } impl ModelInfo { @@ -350,6 +393,7 @@ impl From for ModelPreset { }), show_in_picker: info.visibility == ModelVisibility::List, supported_in_api: info.supported_in_api, + input_modalities: info.input_modalities, } } } @@ -460,6 +504,7 @@ mod tests { auto_compact_token_limit: None, effective_context_window_percent: 95, experimental_supported_tools: vec![], + input_modalities: default_input_modalities(), } } diff --git a/codex-rs/tui/src/bottom_pane/chat_composer.rs b/codex-rs/tui/src/bottom_pane/chat_composer.rs index 068674b41..116e9c3ab 100644 --- a/codex-rs/tui/src/bottom_pane/chat_composer.rs +++ b/codex-rs/tui/src/bottom_pane/chat_composer.rs @@ -394,6 +394,14 @@ impl ChatComposer { self.skills = skills; } + /// Toggle composer-side image paste handling. + /// + /// This only affects whether image-like paste content is converted into attachments; the + /// `ChatWidget` layer still performs capability checks before images are submitted. + pub fn set_image_paste_enabled(&mut self, enabled: bool) { + self.config.image_paste_enabled = enabled; + } + pub fn set_connector_mentions(&mut self, connectors_snapshot: Option) { self.connectors_snapshot = connectors_snapshot; } @@ -712,18 +720,45 @@ impl ChatComposer { } /// Replace the entire composer content with `text` and reset cursor. - /// This clears any pending paste payloads. + /// + /// This is the "fresh draft" path: it clears pending paste payloads and + /// mention link targets. Callers restoring a previously submitted draft + /// that must keep `$name -> path` resolution should use + /// [`Self::set_text_content_with_mention_paths`] instead. pub(crate) fn set_text_content( &mut self, text: String, text_elements: Vec, local_image_paths: Vec, + ) { + self.set_text_content_with_mention_paths( + text, + text_elements, + local_image_paths, + HashMap::new(), + ); + } + + /// Replace the entire composer content while restoring mention link targets. + /// + /// Mention popup insertion stores both visible text (for example `$file`) + /// and hidden `mention_paths` used to resolve the canonical target during + /// submission. Use this method when restoring an interrupted or blocked + /// draft; if callers restore only text and images, mentions can appear + /// intact to users while resolving to the wrong target or dropping on + /// retry. + pub(crate) fn set_text_content_with_mention_paths( + &mut self, + text: String, + text_elements: Vec, + local_image_paths: Vec, + mention_paths: HashMap, ) { // Clear any existing content, placeholders, and attachments first. self.textarea.set_text_clearing_elements(""); self.pending_pastes.clear(); self.attached_images.clear(); - self.mention_paths.clear(); + self.mention_paths = mention_paths; self.textarea.set_text_with_elements(&text, &text_elements); diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 6d3a6755a..cd5e2adb8 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -209,6 +209,14 @@ impl BottomPane { self.request_redraw(); } + /// Update image-paste behavior for the active composer and repaint immediately. + /// + /// Callers use this to keep composer affordances aligned with model capabilities. + pub fn set_image_paste_enabled(&mut self, enabled: bool) { + self.composer.set_image_paste_enabled(enabled); + self.request_redraw(); + } + pub fn set_connectors_snapshot(&mut self, snapshot: Option) { self.composer.set_connector_mentions(snapshot); self.request_redraw(); @@ -402,6 +410,10 @@ impl BottomPane { } /// Replace the composer text with `text`. + /// + /// This is intended for fresh input where mention linkage does not need to + /// survive; it routes to `ChatComposer::set_text_content`, which resets + /// `mention_paths`. pub(crate) fn set_composer_text( &mut self, text: String, @@ -414,6 +426,27 @@ impl BottomPane { self.request_redraw(); } + /// Replace the composer text while preserving mention link targets. + /// + /// Use this when rehydrating a draft after a local validation/gating + /// failure (for example unsupported image submit) so previously selected + /// mention targets remain stable across retry. + pub(crate) fn set_composer_text_with_mention_paths( + &mut self, + text: String, + text_elements: Vec, + local_image_paths: Vec, + mention_paths: HashMap, + ) { + self.composer.set_text_content_with_mention_paths( + text, + text_elements, + local_image_paths, + mention_paths, + ); + self.request_redraw(); + } + #[allow(dead_code)] pub(crate) fn set_composer_input_enabled( &mut self, diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index cc99aef7c..ce4af0d2f 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -208,6 +208,7 @@ use codex_core::ThreadManager; use codex_core::protocol::AskForApproval; use codex_core::protocol::SandboxPolicy; use codex_file_search::FileMatch; +use codex_protocol::openai_models::InputModality; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig; use codex_protocol::plan_tool::UpdatePlanArgs; @@ -2748,7 +2749,18 @@ impl ChatWidget { } } + /// Attach a local image to the composer when the active model supports image inputs. + /// + /// When the model does not advertise image support, we keep the draft unchanged and surface a + /// warning event so users can switch models or remove attachments. pub(crate) fn attach_image(&mut self, path: PathBuf) { + if !self.current_model_supports_images() { + self.add_to_history(history_cell::new_warning_event( + self.image_inputs_not_supported_message(), + )); + self.request_redraw(); + return; + } tracing::info!("attach_image path={path:?}"); self.bottom_pane.attach_image(path); self.request_redraw(); @@ -3225,6 +3237,10 @@ impl ChatWidget { if text.is_empty() && local_images.is_empty() { return; } + if !local_images.is_empty() && !self.current_model_supports_images() { + self.restore_blocked_image_submission(text, text_elements, local_images, mention_paths); + return; + } let mut items: Vec = Vec::new(); @@ -3339,6 +3355,34 @@ impl ChatWidget { self.needs_final_message_separator = false; } + /// Restore the blocked submission draft without losing mention resolution state. + /// + /// The blocked-image path intentionally keeps the draft in the composer so + /// users can remove attachments and retry. We must restore + /// `mention_paths` alongside visible text; restoring only `$name` tokens + /// makes the draft look correct while degrading mention resolution to + /// name-only heuristics on retry. + fn restore_blocked_image_submission( + &mut self, + text: String, + text_elements: Vec, + local_images: Vec, + mention_paths: HashMap, + ) { + // Preserve the user's composed payload so they can retry after changing models. + let local_image_paths = local_images.iter().map(|img| img.path.clone()).collect(); + self.bottom_pane.set_composer_text_with_mention_paths( + text, + text_elements, + local_image_paths, + mention_paths, + ); + self.add_to_history(history_cell::new_warning_event( + self.image_inputs_not_supported_message(), + )); + self.request_redraw(); + } + /// Replay a subset of initial events into the UI to seed the transcript when /// resuming an existing session. This approximates the live event flow and /// is intentionally conservative: only safe-to-replay items are rendered to @@ -5279,6 +5323,36 @@ impl ChatWidget { .unwrap_or(false) } + /// Return whether the effective model currently advertises image-input support. + /// + /// We intentionally default to `true` when model metadata cannot be read so transient catalog + /// failures do not hard-block user input in the UI. + fn current_model_supports_images(&self) -> bool { + let model = self.current_model(); + self.models_manager + .try_list_models(&self.config) + .ok() + .and_then(|models| { + models + .into_iter() + .find(|preset| preset.model == model) + .map(|preset| preset.input_modalities.contains(&InputModality::Image)) + }) + .unwrap_or(true) + } + + fn sync_image_paste_enabled(&mut self) { + let enabled = self.current_model_supports_images(); + self.bottom_pane.set_image_paste_enabled(enabled); + } + + fn image_inputs_not_supported_message(&self) -> String { + format!( + "Model {} does not support image inputs. Remove images or switch models.", + self.current_model() + ) + } + #[allow(dead_code)] // Used in tests pub(crate) fn current_collaboration_mode(&self) -> &CollaborationMode { &self.current_collaboration_mode @@ -5351,6 +5425,8 @@ impl ChatWidget { fn refresh_model_display(&mut self) { let effective = self.effective_collaboration_mode(); self.session_header.set_model(effective.model()); + // Keep composer paste affordances aligned with the currently effective model. + self.sync_image_paste_enabled(); } fn model_display_name(&self) -> &str { diff --git a/codex-rs/tui/src/chatwidget/tests.rs b/codex-rs/tui/src/chatwidget/tests.rs index ea0cd4363..00141a096 100644 --- a/codex-rs/tui/src/chatwidget/tests.rs +++ b/codex-rs/tui/src/chatwidget/tests.rs @@ -70,6 +70,7 @@ use codex_protocol::config_types::Personality; use codex_protocol::config_types::Settings; use codex_protocol::openai_models::ModelPreset; use codex_protocol::openai_models::ReasoningEffortPreset; +use codex_protocol::openai_models::default_input_modalities; use codex_protocol::parse_command::ParsedCommand; use codex_protocol::plan_tool::PlanItemArg; use codex_protocol::plan_tool::StepStatus; @@ -324,6 +325,49 @@ async fn submission_preserves_text_elements_and_local_images() { assert_eq!(stored_images, local_images); } +#[tokio::test] +async fn blocked_image_restore_preserves_mention_paths() { + let (mut chat, mut rx, _op_rx) = make_chatwidget_manual(None).await; + + let placeholder = "[Image #1]"; + let text = format!("{placeholder} check $file"); + let text_elements = vec![TextElement::new( + (0..placeholder.len()).into(), + Some(placeholder.to_string()), + )]; + let local_images = vec![LocalImageAttachment { + placeholder: placeholder.to_string(), + path: PathBuf::from("/tmp/blocked.png"), + }]; + let mention_paths = + HashMap::from([("file".to_string(), "/tmp/skills/file/SKILL.md".to_string())]); + + chat.restore_blocked_image_submission( + text.clone(), + text_elements.clone(), + local_images.clone(), + mention_paths.clone(), + ); + + assert_eq!(chat.bottom_pane.composer_text(), text); + assert_eq!(chat.bottom_pane.composer_text_elements(), text_elements); + assert_eq!( + chat.bottom_pane.composer_local_image_paths(), + vec![local_images[0].path.clone()], + ); + assert_eq!(chat.bottom_pane.take_mention_paths(), mention_paths); + + let cells = drain_insert_history(&mut rx); + let warning = cells + .last() + .map(|lines| lines_to_single_string(lines)) + .expect("expected warning cell"); + assert!( + warning.contains("does not support image inputs"), + "expected image warning, got: {warning:?}" + ); +} + #[tokio::test] async fn interrupted_turn_restores_queued_messages_with_images_and_elements() { let (mut chat, _rx, _op_rx) = make_chatwidget_manual(None).await; @@ -3154,6 +3198,7 @@ async fn model_picker_hides_show_in_picker_false_models_from_cache() { upgrade: None, show_in_picker, supported_in_api: true, + input_modalities: default_input_modalities(), }; chat.open_model_popup_with_presets(vec![ @@ -3392,6 +3437,7 @@ async fn single_reasoning_option_skips_selection() { upgrade: None, show_in_picker: true, supported_in_api: true, + input_modalities: default_input_modalities(), }; chat.open_reasoning_popup(preset); diff --git a/docs/contributing.md b/docs/contributing.md index d410123d2..19b31073e 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -41,6 +41,15 @@ If you are invited by a Codex team member to contribute a PR, here is the recomm 3. **Document behavior.** If your change affects user-facing behavior, update the README, inline help (`codex --help`), or relevant example projects. 4. **Keep commits atomic.** Each commit should compile and the tests should pass. This makes reviews and potential rollbacks easier. +### Model metadata updates + +When a change updates model catalogs or model metadata (`/models` payloads, presets, or fixtures): + +- Set `input_modalities` explicitly for any model that does not support images. +- Keep compatibility defaults in mind: omitted `input_modalities` currently implies text + image support. +- Ensure client surfaces that accept images (for example, TUI paste/attach) consume the same capability signal. +- Add/update tests that cover unsupported-image behavior and warning paths. + ### Opening a pull request (by invitation only) - Fill in the PR template (or include similar information) - **What? Why? How?** diff --git a/docs/tui-chat-composer.md b/docs/tui-chat-composer.md index 6211cbcfc..b927e2db4 100644 --- a/docs/tui-chat-composer.md +++ b/docs/tui-chat-composer.md @@ -84,6 +84,9 @@ Key effects when disabled: `prepare_submission_text`. - When `slash_commands_enabled` is `false`, slash-context paste-burst exceptions are disabled. - When `image_paste_enabled` is `false`, file-path paste image attachment is skipped. +- `ChatWidget` may toggle `image_paste_enabled` at runtime based on the selected model's + `input_modalities`; attach and submit paths also re-check support and emit a warning instead of + dropping the draft. Built-in slash command availability is centralized in `codex-rs/tui/src/bottom_pane/slash_commands.rs` and reused by both the composer and the command @@ -252,6 +255,11 @@ Non-char input must not leak burst state across unrelated actions: inserting, deleting, flushing a burst, applying a paste placeholder, etc. - Shortcut overlay toggling via `?` is gated on `!is_in_paste_burst()` so pastes cannot flip UI modes while streaming. +- Mention popup selection has two payloads: visible `$name` text and hidden + `mention_paths[name] -> canonical target` linkage. The generic + `set_text_content` path intentionally clears linkage for fresh drafts; restore + paths that rehydrate blocked/interrupted submissions must use the + mention-preserving setter so retry keeps the originally selected target. ## Tests that pin behavior