- Add a single builder for developer permissions messaging that accepts SandboxPolicy and approval policy. This builder now drives the developer “permissions” message that’s injected at session start and any time sandbox/approval settings change. - Trim EnvironmentContext to only include cwd, writable roots, and shell; removed sandbox/approval/network duplication and adjusted XML serialization and tests accordingly. Follow-up: adding a config value to replace the developer permissions message for custom sandboxes.
1190 lines
40 KiB
Rust
1190 lines
40 KiB
Rust
use std::collections::HashMap;
|
||
use std::path::Path;
|
||
|
||
use codex_utils_image::load_and_resize_to_fit;
|
||
use mcp_types::CallToolResult;
|
||
use mcp_types::ContentBlock;
|
||
use serde::Deserialize;
|
||
use serde::Deserializer;
|
||
use serde::Serialize;
|
||
use serde::ser::Serializer;
|
||
use ts_rs::TS;
|
||
|
||
use crate::config_types::SandboxMode;
|
||
use crate::protocol::AskForApproval;
|
||
use crate::protocol::NetworkAccess;
|
||
use crate::protocol::SandboxPolicy;
|
||
use crate::protocol::WritableRoot;
|
||
use crate::user_input::UserInput;
|
||
use codex_git::GhostCommit;
|
||
use codex_utils_image::error::ImageProcessingError;
|
||
use schemars::JsonSchema;
|
||
|
||
/// Controls whether a command should use the session sandbox or bypass it.
|
||
#[derive(
|
||
Debug, Clone, Copy, Default, Eq, Hash, PartialEq, Serialize, Deserialize, JsonSchema, TS,
|
||
)]
|
||
#[serde(rename_all = "snake_case")]
|
||
pub enum SandboxPermissions {
|
||
/// Run with the configured sandbox
|
||
#[default]
|
||
UseDefault,
|
||
/// Request to run outside the sandbox
|
||
RequireEscalated,
|
||
}
|
||
|
||
impl SandboxPermissions {
|
||
pub fn requires_escalated_permissions(self) -> bool {
|
||
matches!(self, SandboxPermissions::RequireEscalated)
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum ResponseInputItem {
|
||
Message {
|
||
role: String,
|
||
content: Vec<ContentItem>,
|
||
},
|
||
FunctionCallOutput {
|
||
call_id: String,
|
||
output: FunctionCallOutputPayload,
|
||
},
|
||
McpToolCallOutput {
|
||
call_id: String,
|
||
result: Result<CallToolResult, String>,
|
||
},
|
||
CustomToolCallOutput {
|
||
call_id: String,
|
||
output: String,
|
||
},
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum ContentItem {
|
||
InputText { text: String },
|
||
InputImage { image_url: String },
|
||
OutputText { text: String },
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum ResponseItem {
|
||
Message {
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: Option<String>,
|
||
role: String,
|
||
content: Vec<ContentItem>,
|
||
},
|
||
Reasoning {
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: String,
|
||
summary: Vec<ReasoningItemReasoningSummary>,
|
||
#[serde(default, skip_serializing_if = "should_serialize_reasoning_content")]
|
||
#[ts(optional)]
|
||
content: Option<Vec<ReasoningItemContent>>,
|
||
encrypted_content: Option<String>,
|
||
},
|
||
LocalShellCall {
|
||
/// Set when using the chat completions API.
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: Option<String>,
|
||
/// Set when using the Responses API.
|
||
call_id: Option<String>,
|
||
status: LocalShellStatus,
|
||
action: LocalShellAction,
|
||
},
|
||
FunctionCall {
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: Option<String>,
|
||
name: String,
|
||
// The Responses API returns the function call arguments as a *string* that contains
|
||
// JSON, not as an already‑parsed object. We keep it as a raw string here and let
|
||
// Session::handle_function_call parse it into a Value. This exactly matches the
|
||
// Chat Completions + Responses API behavior.
|
||
arguments: String,
|
||
call_id: String,
|
||
},
|
||
// NOTE: The input schema for `function_call_output` objects that clients send to the
|
||
// OpenAI /v1/responses endpoint is NOT the same shape as the objects the server returns on the
|
||
// SSE stream. When *sending* we must wrap the string output inside an object that includes a
|
||
// required `success` boolean. To ensure we serialize exactly the expected shape we introduce
|
||
// a dedicated payload struct and flatten it here.
|
||
FunctionCallOutput {
|
||
call_id: String,
|
||
output: FunctionCallOutputPayload,
|
||
},
|
||
CustomToolCall {
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
status: Option<String>,
|
||
|
||
call_id: String,
|
||
name: String,
|
||
input: String,
|
||
},
|
||
CustomToolCallOutput {
|
||
call_id: String,
|
||
output: String,
|
||
},
|
||
// Emitted by the Responses API when the agent triggers a web search.
|
||
// Example payload (from SSE `response.output_item.done`):
|
||
// {
|
||
// "id":"ws_...",
|
||
// "type":"web_search_call",
|
||
// "status":"completed",
|
||
// "action": {"type":"search","query":"weather: San Francisco, CA"}
|
||
// }
|
||
WebSearchCall {
|
||
#[serde(default, skip_serializing)]
|
||
#[ts(skip)]
|
||
id: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
status: Option<String>,
|
||
action: WebSearchAction,
|
||
},
|
||
// Generated by the harness but considered exactly as a model response.
|
||
GhostSnapshot {
|
||
ghost_commit: GhostCommit,
|
||
},
|
||
#[serde(alias = "compaction_summary")]
|
||
Compaction {
|
||
encrypted_content: String,
|
||
},
|
||
#[serde(other)]
|
||
Other,
|
||
}
|
||
|
||
/// Developer-provided guidance that is injected into a turn as a developer role
|
||
/// message.
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(rename = "developer_instructions", rename_all = "snake_case")]
|
||
pub struct DeveloperInstructions {
|
||
text: String,
|
||
}
|
||
|
||
const APPROVAL_POLICY_NEVER: &str = include_str!("prompts/permissions/approval_policy/never.md");
|
||
const APPROVAL_POLICY_UNLESS_TRUSTED: &str =
|
||
include_str!("prompts/permissions/approval_policy/unless_trusted.md");
|
||
const APPROVAL_POLICY_ON_FAILURE: &str =
|
||
include_str!("prompts/permissions/approval_policy/on_failure.md");
|
||
const APPROVAL_POLICY_ON_REQUEST: &str =
|
||
include_str!("prompts/permissions/approval_policy/on_request.md");
|
||
|
||
const SANDBOX_MODE_DANGER_FULL_ACCESS: &str =
|
||
include_str!("prompts/permissions/sandbox_mode/danger_full_access.md");
|
||
const SANDBOX_MODE_WORKSPACE_WRITE: &str =
|
||
include_str!("prompts/permissions/sandbox_mode/workspace_write.md");
|
||
const SANDBOX_MODE_READ_ONLY: &str = include_str!("prompts/permissions/sandbox_mode/read_only.md");
|
||
|
||
impl DeveloperInstructions {
|
||
pub fn new<T: Into<String>>(text: T) -> Self {
|
||
Self { text: text.into() }
|
||
}
|
||
|
||
pub fn into_text(self) -> String {
|
||
self.text
|
||
}
|
||
|
||
pub fn concat(self, other: impl Into<DeveloperInstructions>) -> Self {
|
||
let mut text = self.text;
|
||
text.push_str(&other.into().text);
|
||
Self { text }
|
||
}
|
||
|
||
pub fn from_policy(
|
||
sandbox_policy: &SandboxPolicy,
|
||
approval_policy: AskForApproval,
|
||
cwd: &Path,
|
||
) -> Self {
|
||
let network_access = if sandbox_policy.has_full_network_access() {
|
||
NetworkAccess::Enabled
|
||
} else {
|
||
NetworkAccess::Restricted
|
||
};
|
||
|
||
let (sandbox_mode, writable_roots) = match sandbox_policy {
|
||
SandboxPolicy::DangerFullAccess => (SandboxMode::DangerFullAccess, None),
|
||
SandboxPolicy::ReadOnly => (SandboxMode::ReadOnly, None),
|
||
SandboxPolicy::ExternalSandbox { .. } => (SandboxMode::DangerFullAccess, None),
|
||
SandboxPolicy::WorkspaceWrite { .. } => {
|
||
let roots = sandbox_policy.get_writable_roots_with_cwd(cwd);
|
||
(SandboxMode::WorkspaceWrite, Some(roots))
|
||
}
|
||
};
|
||
|
||
DeveloperInstructions::from_permissions_with_network(
|
||
sandbox_mode,
|
||
network_access,
|
||
approval_policy,
|
||
writable_roots,
|
||
)
|
||
}
|
||
|
||
fn from_permissions_with_network(
|
||
sandbox_mode: SandboxMode,
|
||
network_access: NetworkAccess,
|
||
approval_policy: AskForApproval,
|
||
writable_roots: Option<Vec<WritableRoot>>,
|
||
) -> Self {
|
||
let start_tag = DeveloperInstructions::new("<permissions instructions>");
|
||
let end_tag = DeveloperInstructions::new("</permissions instructions>");
|
||
start_tag
|
||
.concat(DeveloperInstructions::sandbox_text(
|
||
sandbox_mode,
|
||
network_access,
|
||
))
|
||
.concat(DeveloperInstructions::from(approval_policy))
|
||
.concat(DeveloperInstructions::from_writable_roots(writable_roots))
|
||
.concat(end_tag)
|
||
}
|
||
|
||
fn from_writable_roots(writable_roots: Option<Vec<WritableRoot>>) -> Self {
|
||
let Some(roots) = writable_roots else {
|
||
return DeveloperInstructions::new("");
|
||
};
|
||
|
||
if roots.is_empty() {
|
||
return DeveloperInstructions::new("");
|
||
}
|
||
|
||
let roots_list: Vec<String> = roots
|
||
.iter()
|
||
.map(|r| format!("`{}`", r.root.to_string_lossy()))
|
||
.collect();
|
||
let text = if roots_list.len() == 1 {
|
||
format!(" The writable root is {}.", roots_list[0])
|
||
} else {
|
||
format!(" The writable roots are {}.", roots_list.join(", "))
|
||
};
|
||
DeveloperInstructions::new(text)
|
||
}
|
||
|
||
fn sandbox_text(mode: SandboxMode, network_access: NetworkAccess) -> DeveloperInstructions {
|
||
let template = match mode {
|
||
SandboxMode::DangerFullAccess => SANDBOX_MODE_DANGER_FULL_ACCESS.trim_end(),
|
||
SandboxMode::WorkspaceWrite => SANDBOX_MODE_WORKSPACE_WRITE.trim_end(),
|
||
SandboxMode::ReadOnly => SANDBOX_MODE_READ_ONLY.trim_end(),
|
||
};
|
||
let text = template.replace("{network_access}", &network_access.to_string());
|
||
|
||
DeveloperInstructions::new(text)
|
||
}
|
||
}
|
||
|
||
impl From<DeveloperInstructions> for ResponseItem {
|
||
fn from(di: DeveloperInstructions) -> Self {
|
||
ResponseItem::Message {
|
||
id: None,
|
||
role: "developer".to_string(),
|
||
content: vec![ContentItem::InputText {
|
||
text: di.into_text(),
|
||
}],
|
||
}
|
||
}
|
||
}
|
||
|
||
impl From<SandboxMode> for DeveloperInstructions {
|
||
fn from(mode: SandboxMode) -> Self {
|
||
let network_access = match mode {
|
||
SandboxMode::DangerFullAccess => NetworkAccess::Enabled,
|
||
SandboxMode::WorkspaceWrite | SandboxMode::ReadOnly => NetworkAccess::Restricted,
|
||
};
|
||
|
||
DeveloperInstructions::sandbox_text(mode, network_access)
|
||
}
|
||
}
|
||
|
||
impl From<AskForApproval> for DeveloperInstructions {
|
||
fn from(mode: AskForApproval) -> Self {
|
||
let text = match mode {
|
||
AskForApproval::Never => APPROVAL_POLICY_NEVER.trim_end(),
|
||
AskForApproval::UnlessTrusted => APPROVAL_POLICY_UNLESS_TRUSTED.trim_end(),
|
||
AskForApproval::OnFailure => APPROVAL_POLICY_ON_FAILURE.trim_end(),
|
||
AskForApproval::OnRequest => APPROVAL_POLICY_ON_REQUEST.trim_end(),
|
||
};
|
||
|
||
DeveloperInstructions::new(text)
|
||
}
|
||
}
|
||
|
||
fn should_serialize_reasoning_content(content: &Option<Vec<ReasoningItemContent>>) -> bool {
|
||
match content {
|
||
Some(content) => !content
|
||
.iter()
|
||
.any(|c| matches!(c, ReasoningItemContent::ReasoningText { .. })),
|
||
None => false,
|
||
}
|
||
}
|
||
|
||
fn local_image_error_placeholder(
|
||
path: &std::path::Path,
|
||
error: impl std::fmt::Display,
|
||
) -> ContentItem {
|
||
ContentItem::InputText {
|
||
text: format!(
|
||
"Codex could not read the local image at `{}`: {}",
|
||
path.display(),
|
||
error
|
||
),
|
||
}
|
||
}
|
||
|
||
pub const VIEW_IMAGE_TOOL_NAME: &str = "view_image";
|
||
|
||
const IMAGE_OPEN_TAG: &str = "<image>";
|
||
const IMAGE_CLOSE_TAG: &str = "</image>";
|
||
const LOCAL_IMAGE_OPEN_TAG_PREFIX: &str = "<image name=";
|
||
const LOCAL_IMAGE_OPEN_TAG_SUFFIX: &str = ">";
|
||
const LOCAL_IMAGE_CLOSE_TAG: &str = IMAGE_CLOSE_TAG;
|
||
|
||
pub fn image_open_tag_text() -> String {
|
||
IMAGE_OPEN_TAG.to_string()
|
||
}
|
||
|
||
pub fn image_close_tag_text() -> String {
|
||
IMAGE_CLOSE_TAG.to_string()
|
||
}
|
||
|
||
pub fn local_image_label_text(label_number: usize) -> String {
|
||
format!("[Image #{label_number}]")
|
||
}
|
||
|
||
pub fn local_image_open_tag_text(label_number: usize) -> String {
|
||
let label = local_image_label_text(label_number);
|
||
format!("{LOCAL_IMAGE_OPEN_TAG_PREFIX}{label}{LOCAL_IMAGE_OPEN_TAG_SUFFIX}")
|
||
}
|
||
|
||
pub fn is_local_image_open_tag_text(text: &str) -> bool {
|
||
text.strip_prefix(LOCAL_IMAGE_OPEN_TAG_PREFIX)
|
||
.is_some_and(|rest| rest.ends_with(LOCAL_IMAGE_OPEN_TAG_SUFFIX))
|
||
}
|
||
|
||
pub fn is_local_image_close_tag_text(text: &str) -> bool {
|
||
is_image_close_tag_text(text)
|
||
}
|
||
|
||
pub fn is_image_open_tag_text(text: &str) -> bool {
|
||
text == IMAGE_OPEN_TAG
|
||
}
|
||
|
||
pub fn is_image_close_tag_text(text: &str) -> bool {
|
||
text == IMAGE_CLOSE_TAG
|
||
}
|
||
|
||
fn invalid_image_error_placeholder(
|
||
path: &std::path::Path,
|
||
error: impl std::fmt::Display,
|
||
) -> ContentItem {
|
||
ContentItem::InputText {
|
||
text: format!(
|
||
"Image located at `{}` is invalid: {}",
|
||
path.display(),
|
||
error
|
||
),
|
||
}
|
||
}
|
||
|
||
fn unsupported_image_error_placeholder(path: &std::path::Path, mime: &str) -> ContentItem {
|
||
ContentItem::InputText {
|
||
text: format!(
|
||
"Codex cannot attach image at `{}`: unsupported image format `{}`.",
|
||
path.display(),
|
||
mime
|
||
),
|
||
}
|
||
}
|
||
|
||
pub fn local_image_content_items_with_label_number(
|
||
path: &std::path::Path,
|
||
label_number: Option<usize>,
|
||
) -> Vec<ContentItem> {
|
||
match load_and_resize_to_fit(path) {
|
||
Ok(image) => {
|
||
let mut items = Vec::with_capacity(3);
|
||
if let Some(label_number) = label_number {
|
||
items.push(ContentItem::InputText {
|
||
text: local_image_open_tag_text(label_number),
|
||
});
|
||
}
|
||
items.push(ContentItem::InputImage {
|
||
image_url: image.into_data_url(),
|
||
});
|
||
if label_number.is_some() {
|
||
items.push(ContentItem::InputText {
|
||
text: LOCAL_IMAGE_CLOSE_TAG.to_string(),
|
||
});
|
||
}
|
||
items
|
||
}
|
||
Err(err) => {
|
||
if matches!(&err, ImageProcessingError::Read { .. }) {
|
||
vec![local_image_error_placeholder(path, &err)]
|
||
} else if err.is_invalid_image() {
|
||
vec![invalid_image_error_placeholder(path, &err)]
|
||
} else {
|
||
let Some(mime_guess) = mime_guess::from_path(path).first() else {
|
||
return vec![local_image_error_placeholder(
|
||
path,
|
||
"unsupported MIME type (unknown)",
|
||
)];
|
||
};
|
||
let mime = mime_guess.essence_str().to_owned();
|
||
if !mime.starts_with("image/") {
|
||
return vec![local_image_error_placeholder(
|
||
path,
|
||
format!("unsupported MIME type `{mime}`"),
|
||
)];
|
||
}
|
||
vec![unsupported_image_error_placeholder(path, &mime)]
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
impl From<ResponseInputItem> for ResponseItem {
|
||
fn from(item: ResponseInputItem) -> Self {
|
||
match item {
|
||
ResponseInputItem::Message { role, content } => Self::Message {
|
||
role,
|
||
content,
|
||
id: None,
|
||
},
|
||
ResponseInputItem::FunctionCallOutput { call_id, output } => {
|
||
Self::FunctionCallOutput { call_id, output }
|
||
}
|
||
ResponseInputItem::McpToolCallOutput { call_id, result } => {
|
||
let output = match result {
|
||
Ok(result) => FunctionCallOutputPayload::from(&result),
|
||
Err(tool_call_err) => FunctionCallOutputPayload {
|
||
content: format!("err: {tool_call_err:?}"),
|
||
success: Some(false),
|
||
..Default::default()
|
||
},
|
||
};
|
||
Self::FunctionCallOutput { call_id, output }
|
||
}
|
||
ResponseInputItem::CustomToolCallOutput { call_id, output } => {
|
||
Self::CustomToolCallOutput { call_id, output }
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(rename_all = "snake_case")]
|
||
pub enum LocalShellStatus {
|
||
Completed,
|
||
InProgress,
|
||
Incomplete,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum LocalShellAction {
|
||
Exec(LocalShellExecAction),
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
pub struct LocalShellExecAction {
|
||
pub command: Vec<String>,
|
||
pub timeout_ms: Option<u64>,
|
||
pub working_directory: Option<String>,
|
||
pub env: Option<HashMap<String, String>>,
|
||
pub user: Option<String>,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum WebSearchAction {
|
||
Search {
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
query: Option<String>,
|
||
},
|
||
OpenPage {
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
url: Option<String>,
|
||
},
|
||
FindInPage {
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
url: Option<String>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
pattern: Option<String>,
|
||
},
|
||
|
||
#[serde(other)]
|
||
Other,
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum ReasoningItemReasoningSummary {
|
||
SummaryText { text: String },
|
||
}
|
||
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum ReasoningItemContent {
|
||
ReasoningText { text: String },
|
||
Text { text: String },
|
||
}
|
||
|
||
impl From<Vec<UserInput>> for ResponseInputItem {
|
||
fn from(items: Vec<UserInput>) -> Self {
|
||
let mut image_index = 0;
|
||
Self::Message {
|
||
role: "user".to_string(),
|
||
content: items
|
||
.into_iter()
|
||
.flat_map(|c| match c {
|
||
UserInput::Text { text } => vec![ContentItem::InputText { text }],
|
||
UserInput::Image { image_url } => vec![
|
||
ContentItem::InputText {
|
||
text: image_open_tag_text(),
|
||
},
|
||
ContentItem::InputImage { image_url },
|
||
ContentItem::InputText {
|
||
text: image_close_tag_text(),
|
||
},
|
||
],
|
||
UserInput::LocalImage { path } => {
|
||
image_index += 1;
|
||
local_image_content_items_with_label_number(&path, Some(image_index))
|
||
}
|
||
UserInput::Skill { .. } => Vec::new(), // Skill bodies are injected later in core
|
||
})
|
||
.collect::<Vec<ContentItem>>(),
|
||
}
|
||
}
|
||
}
|
||
|
||
/// If the `name` of a `ResponseItem::FunctionCall` is either `container.exec`
|
||
/// or `shell`, the `arguments` field should deserialize to this struct.
|
||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||
pub struct ShellToolCallParams {
|
||
pub command: Vec<String>,
|
||
pub workdir: Option<String>,
|
||
|
||
/// This is the maximum time in milliseconds that the command is allowed to run.
|
||
#[serde(alias = "timeout")]
|
||
pub timeout_ms: Option<u64>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
pub sandbox_permissions: Option<SandboxPermissions>,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
pub justification: Option<String>,
|
||
}
|
||
|
||
/// If the `name` of a `ResponseItem::FunctionCall` is `shell_command`, the
|
||
/// `arguments` field should deserialize to this struct.
|
||
#[derive(Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||
pub struct ShellCommandToolCallParams {
|
||
pub command: String,
|
||
pub workdir: Option<String>,
|
||
|
||
/// Whether to run the shell with login shell semantics
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
pub login: Option<bool>,
|
||
/// This is the maximum time in milliseconds that the command is allowed to run.
|
||
#[serde(alias = "timeout")]
|
||
pub timeout_ms: Option<u64>,
|
||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||
#[ts(optional)]
|
||
pub sandbox_permissions: Option<SandboxPermissions>,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
pub justification: Option<String>,
|
||
}
|
||
|
||
/// Responses API compatible content items that can be returned by a tool call.
|
||
/// This is a subset of ContentItem with the types we support as function call outputs.
|
||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, JsonSchema, TS)]
|
||
#[serde(tag = "type", rename_all = "snake_case")]
|
||
pub enum FunctionCallOutputContentItem {
|
||
// Do not rename, these are serialized and used directly in the responses API.
|
||
InputText { text: String },
|
||
// Do not rename, these are serialized and used directly in the responses API.
|
||
InputImage { image_url: String },
|
||
}
|
||
|
||
/// The payload we send back to OpenAI when reporting a tool call result.
|
||
///
|
||
/// `content` preserves the historical plain-string payload so downstream
|
||
/// integrations (tests, logging, etc.) can keep treating tool output as
|
||
/// `String`. When an MCP server returns richer data we additionally populate
|
||
/// `content_items` with the structured form that the Responses/Chat
|
||
/// Completions APIs understand.
|
||
#[derive(Debug, Default, Clone, PartialEq, JsonSchema, TS)]
|
||
pub struct FunctionCallOutputPayload {
|
||
pub content: String,
|
||
#[serde(skip_serializing_if = "Option::is_none")]
|
||
pub content_items: Option<Vec<FunctionCallOutputContentItem>>,
|
||
pub success: Option<bool>,
|
||
}
|
||
|
||
#[derive(Deserialize)]
|
||
#[serde(untagged)]
|
||
enum FunctionCallOutputPayloadSerde {
|
||
Text(String),
|
||
Items(Vec<FunctionCallOutputContentItem>),
|
||
}
|
||
|
||
// The Responses API expects two *different* shapes depending on success vs failure:
|
||
// • success → output is a plain string (no nested object)
|
||
// • failure → output is an object { content, success:false }
|
||
impl Serialize for FunctionCallOutputPayload {
|
||
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
|
||
where
|
||
S: Serializer,
|
||
{
|
||
if let Some(items) = &self.content_items {
|
||
items.serialize(serializer)
|
||
} else {
|
||
serializer.serialize_str(&self.content)
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'de> Deserialize<'de> for FunctionCallOutputPayload {
|
||
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
|
||
where
|
||
D: Deserializer<'de>,
|
||
{
|
||
match FunctionCallOutputPayloadSerde::deserialize(deserializer)? {
|
||
FunctionCallOutputPayloadSerde::Text(content) => Ok(FunctionCallOutputPayload {
|
||
content,
|
||
..Default::default()
|
||
}),
|
||
FunctionCallOutputPayloadSerde::Items(items) => {
|
||
let content = serde_json::to_string(&items).map_err(serde::de::Error::custom)?;
|
||
Ok(FunctionCallOutputPayload {
|
||
content,
|
||
content_items: Some(items),
|
||
success: None,
|
||
})
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
impl From<&CallToolResult> for FunctionCallOutputPayload {
|
||
fn from(call_tool_result: &CallToolResult) -> Self {
|
||
let CallToolResult {
|
||
content,
|
||
structured_content,
|
||
is_error,
|
||
} = call_tool_result;
|
||
|
||
let is_success = is_error != &Some(true);
|
||
|
||
if let Some(structured_content) = structured_content
|
||
&& !structured_content.is_null()
|
||
{
|
||
match serde_json::to_string(structured_content) {
|
||
Ok(serialized_structured_content) => {
|
||
return FunctionCallOutputPayload {
|
||
content: serialized_structured_content,
|
||
success: Some(is_success),
|
||
..Default::default()
|
||
};
|
||
}
|
||
Err(err) => {
|
||
return FunctionCallOutputPayload {
|
||
content: err.to_string(),
|
||
success: Some(false),
|
||
..Default::default()
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
let serialized_content = match serde_json::to_string(content) {
|
||
Ok(serialized_content) => serialized_content,
|
||
Err(err) => {
|
||
return FunctionCallOutputPayload {
|
||
content: err.to_string(),
|
||
success: Some(false),
|
||
..Default::default()
|
||
};
|
||
}
|
||
};
|
||
|
||
let content_items = convert_content_blocks_to_items(content);
|
||
|
||
FunctionCallOutputPayload {
|
||
content: serialized_content,
|
||
content_items,
|
||
success: Some(is_success),
|
||
}
|
||
}
|
||
}
|
||
|
||
fn convert_content_blocks_to_items(
|
||
blocks: &[ContentBlock],
|
||
) -> Option<Vec<FunctionCallOutputContentItem>> {
|
||
let mut saw_image = false;
|
||
let mut items = Vec::with_capacity(blocks.len());
|
||
tracing::warn!("Blocks: {:?}", blocks);
|
||
for block in blocks {
|
||
match block {
|
||
ContentBlock::TextContent(text) => {
|
||
items.push(FunctionCallOutputContentItem::InputText {
|
||
text: text.text.clone(),
|
||
});
|
||
}
|
||
ContentBlock::ImageContent(image) => {
|
||
saw_image = true;
|
||
// Just in case the content doesn't include a data URL, add it.
|
||
let image_url = if image.data.starts_with("data:") {
|
||
image.data.clone()
|
||
} else {
|
||
format!("data:{};base64,{}", image.mime_type, image.data)
|
||
};
|
||
items.push(FunctionCallOutputContentItem::InputImage { image_url });
|
||
}
|
||
// TODO: render audio, resource, and embedded resource content to the model.
|
||
_ => return None,
|
||
}
|
||
}
|
||
|
||
if saw_image { Some(items) } else { None }
|
||
}
|
||
|
||
// Implement Display so callers can treat the payload like a plain string when logging or doing
|
||
// trivial substring checks in tests (existing tests call `.contains()` on the output). Display
|
||
// returns the raw `content` field.
|
||
|
||
impl std::fmt::Display for FunctionCallOutputPayload {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
f.write_str(&self.content)
|
||
}
|
||
}
|
||
|
||
impl std::ops::Deref for FunctionCallOutputPayload {
|
||
type Target = str;
|
||
fn deref(&self) -> &Self::Target {
|
||
&self.content
|
||
}
|
||
}
|
||
|
||
// (Moved event mapping logic into codex-core to avoid coupling protocol to UI-facing events.)
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::*;
|
||
use crate::config_types::SandboxMode;
|
||
use crate::protocol::AskForApproval;
|
||
use anyhow::Result;
|
||
use mcp_types::ImageContent;
|
||
use mcp_types::TextContent;
|
||
use pretty_assertions::assert_eq;
|
||
use std::path::PathBuf;
|
||
use tempfile::tempdir;
|
||
|
||
#[test]
|
||
fn converts_sandbox_mode_into_developer_instructions() {
|
||
assert_eq!(
|
||
DeveloperInstructions::from(SandboxMode::WorkspaceWrite),
|
||
DeveloperInstructions::new(
|
||
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `workspace-write`: The sandbox permits reading files, and editing files in `cwd` and `writable_roots`. Editing files in other directories requires approval. Network access is restricted."
|
||
)
|
||
);
|
||
|
||
assert_eq!(
|
||
DeveloperInstructions::from(SandboxMode::ReadOnly),
|
||
DeveloperInstructions::new(
|
||
"Filesystem sandboxing defines which files can be read or written. `sandbox_mode` is `read-only`: The sandbox only permits reading files. Network access is restricted."
|
||
)
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn builds_permissions_with_network_access_override() {
|
||
let instructions = DeveloperInstructions::from_permissions_with_network(
|
||
SandboxMode::WorkspaceWrite,
|
||
NetworkAccess::Enabled,
|
||
AskForApproval::OnRequest,
|
||
None,
|
||
);
|
||
|
||
let text = instructions.into_text();
|
||
assert!(
|
||
text.contains("Network access is enabled."),
|
||
"expected network access to be enabled in message"
|
||
);
|
||
assert!(
|
||
text.contains("`approval_policy` is `on-request`"),
|
||
"expected approval guidance to be included"
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn builds_permissions_from_policy() {
|
||
let policy = SandboxPolicy::WorkspaceWrite {
|
||
writable_roots: vec![],
|
||
network_access: true,
|
||
exclude_tmpdir_env_var: false,
|
||
exclude_slash_tmp: false,
|
||
};
|
||
|
||
let instructions = DeveloperInstructions::from_policy(
|
||
&policy,
|
||
AskForApproval::UnlessTrusted,
|
||
&PathBuf::from("/tmp"),
|
||
);
|
||
let text = instructions.into_text();
|
||
assert!(text.contains("Network access is enabled."));
|
||
assert!(text.contains("`approval_policy` is `unless-trusted`"));
|
||
}
|
||
|
||
#[test]
|
||
fn serializes_success_as_plain_string() -> Result<()> {
|
||
let item = ResponseInputItem::FunctionCallOutput {
|
||
call_id: "call1".into(),
|
||
output: FunctionCallOutputPayload {
|
||
content: "ok".into(),
|
||
..Default::default()
|
||
},
|
||
};
|
||
|
||
let json = serde_json::to_string(&item)?;
|
||
let v: serde_json::Value = serde_json::from_str(&json)?;
|
||
|
||
// Success case -> output should be a plain string
|
||
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "ok");
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn serializes_failure_as_string() -> Result<()> {
|
||
let item = ResponseInputItem::FunctionCallOutput {
|
||
call_id: "call1".into(),
|
||
output: FunctionCallOutputPayload {
|
||
content: "bad".into(),
|
||
success: Some(false),
|
||
..Default::default()
|
||
},
|
||
};
|
||
|
||
let json = serde_json::to_string(&item)?;
|
||
let v: serde_json::Value = serde_json::from_str(&json)?;
|
||
|
||
assert_eq!(v.get("output").unwrap().as_str().unwrap(), "bad");
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn serializes_image_outputs_as_array() -> Result<()> {
|
||
let call_tool_result = CallToolResult {
|
||
content: vec![
|
||
ContentBlock::TextContent(TextContent {
|
||
annotations: None,
|
||
text: "caption".into(),
|
||
r#type: "text".into(),
|
||
}),
|
||
ContentBlock::ImageContent(ImageContent {
|
||
annotations: None,
|
||
data: "BASE64".into(),
|
||
mime_type: "image/png".into(),
|
||
r#type: "image".into(),
|
||
}),
|
||
],
|
||
is_error: None,
|
||
structured_content: None,
|
||
};
|
||
|
||
let payload = FunctionCallOutputPayload::from(&call_tool_result);
|
||
assert_eq!(payload.success, Some(true));
|
||
let items = payload.content_items.clone().expect("content items");
|
||
assert_eq!(
|
||
items,
|
||
vec![
|
||
FunctionCallOutputContentItem::InputText {
|
||
text: "caption".into(),
|
||
},
|
||
FunctionCallOutputContentItem::InputImage {
|
||
image_url: "data:image/png;base64,BASE64".into(),
|
||
},
|
||
]
|
||
);
|
||
|
||
let item = ResponseInputItem::FunctionCallOutput {
|
||
call_id: "call1".into(),
|
||
output: payload,
|
||
};
|
||
|
||
let json = serde_json::to_string(&item)?;
|
||
let v: serde_json::Value = serde_json::from_str(&json)?;
|
||
|
||
let output = v.get("output").expect("output field");
|
||
assert!(output.is_array(), "expected array output");
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn deserializes_array_payload_into_items() -> Result<()> {
|
||
let json = r#"[
|
||
{"type": "input_text", "text": "note"},
|
||
{"type": "input_image", "image_url": "data:image/png;base64,XYZ"}
|
||
]"#;
|
||
|
||
let payload: FunctionCallOutputPayload = serde_json::from_str(json)?;
|
||
|
||
assert_eq!(payload.success, None);
|
||
let expected_items = vec![
|
||
FunctionCallOutputContentItem::InputText {
|
||
text: "note".into(),
|
||
},
|
||
FunctionCallOutputContentItem::InputImage {
|
||
image_url: "data:image/png;base64,XYZ".into(),
|
||
},
|
||
];
|
||
assert_eq!(payload.content_items, Some(expected_items.clone()));
|
||
|
||
let expected_content = serde_json::to_string(&expected_items)?;
|
||
assert_eq!(payload.content, expected_content);
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn deserializes_compaction_alias() -> Result<()> {
|
||
let json = r#"{"type":"compaction_summary","encrypted_content":"abc"}"#;
|
||
|
||
let item: ResponseItem = serde_json::from_str(json)?;
|
||
|
||
assert_eq!(
|
||
item,
|
||
ResponseItem::Compaction {
|
||
encrypted_content: "abc".into(),
|
||
}
|
||
);
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn roundtrips_web_search_call_actions() -> Result<()> {
|
||
let cases = vec![
|
||
(
|
||
r#"{
|
||
"type": "web_search_call",
|
||
"status": "completed",
|
||
"action": {
|
||
"type": "search",
|
||
"query": "weather seattle"
|
||
}
|
||
}"#,
|
||
WebSearchAction::Search {
|
||
query: Some("weather seattle".into()),
|
||
},
|
||
Some("completed".into()),
|
||
),
|
||
(
|
||
r#"{
|
||
"type": "web_search_call",
|
||
"status": "open",
|
||
"action": {
|
||
"type": "open_page",
|
||
"url": "https://example.com"
|
||
}
|
||
}"#,
|
||
WebSearchAction::OpenPage {
|
||
url: Some("https://example.com".into()),
|
||
},
|
||
Some("open".into()),
|
||
),
|
||
(
|
||
r#"{
|
||
"type": "web_search_call",
|
||
"status": "in_progress",
|
||
"action": {
|
||
"type": "find_in_page",
|
||
"url": "https://example.com/docs",
|
||
"pattern": "installation"
|
||
}
|
||
}"#,
|
||
WebSearchAction::FindInPage {
|
||
url: Some("https://example.com/docs".into()),
|
||
pattern: Some("installation".into()),
|
||
},
|
||
Some("in_progress".into()),
|
||
),
|
||
];
|
||
|
||
for (json_literal, expected_action, expected_status) in cases {
|
||
let parsed: ResponseItem = serde_json::from_str(json_literal)?;
|
||
let expected = ResponseItem::WebSearchCall {
|
||
id: None,
|
||
status: expected_status.clone(),
|
||
action: expected_action.clone(),
|
||
};
|
||
assert_eq!(parsed, expected);
|
||
|
||
let serialized = serde_json::to_value(&parsed)?;
|
||
let original_value: serde_json::Value = serde_json::from_str(json_literal)?;
|
||
assert_eq!(serialized, original_value);
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn deserialize_shell_tool_call_params() -> Result<()> {
|
||
let json = r#"{
|
||
"command": ["ls", "-l"],
|
||
"workdir": "/tmp",
|
||
"timeout": 1000
|
||
}"#;
|
||
|
||
let params: ShellToolCallParams = serde_json::from_str(json)?;
|
||
assert_eq!(
|
||
ShellToolCallParams {
|
||
command: vec!["ls".to_string(), "-l".to_string()],
|
||
workdir: Some("/tmp".to_string()),
|
||
timeout_ms: Some(1000),
|
||
sandbox_permissions: None,
|
||
justification: None,
|
||
},
|
||
params
|
||
);
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn wraps_image_user_input_with_tags() -> Result<()> {
|
||
let image_url = "data:image/png;base64,abc".to_string();
|
||
|
||
let item = ResponseInputItem::from(vec![UserInput::Image {
|
||
image_url: image_url.clone(),
|
||
}]);
|
||
|
||
match item {
|
||
ResponseInputItem::Message { content, .. } => {
|
||
let expected = vec![
|
||
ContentItem::InputText {
|
||
text: image_open_tag_text(),
|
||
},
|
||
ContentItem::InputImage { image_url },
|
||
ContentItem::InputText {
|
||
text: image_close_tag_text(),
|
||
},
|
||
];
|
||
assert_eq!(content, expected);
|
||
}
|
||
other => panic!("expected message response but got {other:?}"),
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn local_image_read_error_adds_placeholder() -> Result<()> {
|
||
let dir = tempdir()?;
|
||
let missing_path = dir.path().join("missing-image.png");
|
||
|
||
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
|
||
path: missing_path.clone(),
|
||
}]);
|
||
|
||
match item {
|
||
ResponseInputItem::Message { content, .. } => {
|
||
assert_eq!(content.len(), 1);
|
||
match &content[0] {
|
||
ContentItem::InputText { text } => {
|
||
let display_path = missing_path.display().to_string();
|
||
assert!(
|
||
text.contains(&display_path),
|
||
"placeholder should mention missing path: {text}"
|
||
);
|
||
assert!(
|
||
text.contains("could not read"),
|
||
"placeholder should mention read issue: {text}"
|
||
);
|
||
}
|
||
other => panic!("expected placeholder text but found {other:?}"),
|
||
}
|
||
}
|
||
other => panic!("expected message response but got {other:?}"),
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn local_image_non_image_adds_placeholder() -> Result<()> {
|
||
let dir = tempdir()?;
|
||
let json_path = dir.path().join("example.json");
|
||
std::fs::write(&json_path, br#"{"hello":"world"}"#)?;
|
||
|
||
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
|
||
path: json_path.clone(),
|
||
}]);
|
||
|
||
match item {
|
||
ResponseInputItem::Message { content, .. } => {
|
||
assert_eq!(content.len(), 1);
|
||
match &content[0] {
|
||
ContentItem::InputText { text } => {
|
||
assert!(
|
||
text.contains("unsupported MIME type `application/json`"),
|
||
"placeholder should mention unsupported MIME: {text}"
|
||
);
|
||
assert!(
|
||
text.contains(&json_path.display().to_string()),
|
||
"placeholder should mention path: {text}"
|
||
);
|
||
}
|
||
other => panic!("expected placeholder text but found {other:?}"),
|
||
}
|
||
}
|
||
other => panic!("expected message response but got {other:?}"),
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
|
||
#[test]
|
||
fn local_image_unsupported_image_format_adds_placeholder() -> Result<()> {
|
||
let dir = tempdir()?;
|
||
let svg_path = dir.path().join("example.svg");
|
||
std::fs::write(
|
||
&svg_path,
|
||
br#"<?xml version="1.0" encoding="UTF-8"?>
|
||
<svg xmlns="http://www.w3.org/2000/svg" width="1" height="1"></svg>"#,
|
||
)?;
|
||
|
||
let item = ResponseInputItem::from(vec![UserInput::LocalImage {
|
||
path: svg_path.clone(),
|
||
}]);
|
||
|
||
match item {
|
||
ResponseInputItem::Message { content, .. } => {
|
||
assert_eq!(content.len(), 1);
|
||
let expected = format!(
|
||
"Codex cannot attach image at `{}`: unsupported image format `image/svg+xml`.",
|
||
svg_path.display()
|
||
);
|
||
match &content[0] {
|
||
ContentItem::InputText { text } => assert_eq!(text, &expected),
|
||
other => panic!("expected placeholder text but found {other:?}"),
|
||
}
|
||
}
|
||
other => panic!("expected message response but got {other:?}"),
|
||
}
|
||
|
||
Ok(())
|
||
}
|
||
}
|