core-agent-ide/codex-rs/protocol/src/protocol.rs

//! Defines the protocol for a Codex session between a client and an agent.
//!
//! Uses a SQ (Submission Queue) / EQ (Event Queue) pattern to asynchronously communicate
//! between user and agent.

use std::collections::HashMap;
use std::ffi::OsStr;
use std::fmt;
use std::path::Path;
use std::path::PathBuf;
use std::str::FromStr;
use std::time::Duration;

use crate::ThreadId;
use crate::approvals::ElicitationRequestEvent;
use crate::config_types::CollaborationMode;
use crate::config_types::Personality;
use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
use crate::config_types::WindowsSandboxLevel;
use crate::custom_prompts::CustomPrompt;
use crate::dynamic_tools::DynamicToolCallRequest;
use crate::dynamic_tools::DynamicToolResponse;
use crate::items::TurnItem;
use crate::message_history::HistoryEntry;
use crate::models::BaseInstructions;
use crate::models::ContentItem;
use crate::models::ResponseItem;
use crate::models::WebSearchAction;
use crate::num_format::format_with_separators;
use crate::openai_models::ReasoningEffort as ReasoningEffortConfig;
use crate::parse_command::ParsedCommand;
use crate::plan_tool::UpdatePlanArgs;
use crate::request_user_input::RequestUserInputResponse;
use crate::user_input::UserInput;
use codex_utils_absolute_path::AbsolutePathBuf;
use mcp_types::CallToolResult;
use mcp_types::RequestId;
use mcp_types::Resource as McpResource;
use mcp_types::ResourceTemplate as McpResourceTemplate;
use mcp_types::Tool as McpTool;
use schemars::JsonSchema;
use serde::Deserialize;
use serde::Serialize;
use serde_json::Value;
use serde_with::serde_as;
use strum_macros::Display;
use tracing::error;
use ts_rs::TS;

pub use crate::approvals::ApplyPatchApprovalRequestEvent;
pub use crate::approvals::ElicitationAction;
pub use crate::approvals::ExecApprovalRequestEvent;
pub use crate::approvals::ExecPolicyAmendment;
pub use crate::request_user_input::RequestUserInputEvent;

/// Open/close tags for special user-input blocks. Used across crates to avoid
/// duplicated hardcoded strings.
pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
pub const USER_INSTRUCTIONS_CLOSE_TAG: &str = "</user_instructions>";
pub const ENVIRONMENT_CONTEXT_OPEN_TAG: &str = "<environment_context>";
pub const ENVIRONMENT_CONTEXT_CLOSE_TAG: &str = "</environment_context>";
pub const COLLABORATION_MODE_OPEN_TAG: &str = "<collaboration_mode>";
pub const COLLABORATION_MODE_CLOSE_TAG: &str = "</collaboration_mode>";
pub const USER_MESSAGE_BEGIN: &str = "## My request for Codex:";

/// Submission Queue Entry - requests from user
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
pub struct Submission {
    /// Unique id for this Submission to correlate with Events
    pub id: String,
    /// Payload
    pub op: Op,
}

/// Config payload for refreshing MCP servers.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema)]
pub struct McpServerRefreshConfig {
    pub mcp_servers: Value,
    pub mcp_oauth_credentials_store_mode: Value,
}

/// Submission operation
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema)]
#[serde(tag = "type", rename_all = "snake_case")]
#[allow(clippy::large_enum_variant)]
#[non_exhaustive]
pub enum Op {
    /// Abort current task.
    /// This server sends [`EventMsg::TurnAborted`] in response.
    Interrupt,

    /// Legacy user input.
    ///
    /// Prefer [`Op::UserTurn`] so the caller provides full turn context
    /// (cwd/approval/sandbox/model/etc.) for each turn.
    UserInput {
        /// User input items, see `InputItem`
        items: Vec<UserInput>,
        /// Optional JSON Schema used to constrain the final assistant message for this turn.
        #[serde(skip_serializing_if = "Option::is_none")]
        final_output_json_schema: Option<Value>,
    },

    /// Similar to [`Op::UserInput`], but contains additional context required
    /// for a turn of a [`crate::codex_thread::CodexThread`].
    UserTurn {
        /// User input items, see `InputItem`
        items: Vec<UserInput>,

        /// `cwd` to use with the [`SandboxPolicy`] and potentially tool calls
        /// such as `local_shell`.
        cwd: PathBuf,

        /// Policy to use for command approval.
        approval_policy: AskForApproval,

        /// Policy to use for tool calls such as `local_shell`.
        sandbox_policy: SandboxPolicy,

        /// Must be a valid model slug for the configured client session
        /// associated with this conversation.
        model: String,

        /// Will only be honored if the model is configured to use reasoning.
        #[serde(skip_serializing_if = "Option::is_none")]
        effort: Option<ReasoningEffortConfig>,

        /// Will only be honored if the model is configured to use reasoning.
        summary: ReasoningSummaryConfig,
        // The JSON schema to use for the final assistant message
        final_output_json_schema: Option<Value>,

        /// EXPERIMENTAL - set a pre-set collaboration mode.
        /// Takes precedence over model, effort, and developer instructions if set.
        #[serde(skip_serializing_if = "Option::is_none")]
        collaboration_mode: Option<CollaborationMode>,

        /// Optional personality override for this turn.
        #[serde(skip_serializing_if = "Option::is_none")]
        personality: Option<Personality>,
    },

    /// Override parts of the persistent turn context for subsequent turns.
    ///
    /// All fields are optional; when omitted, the existing value is preserved.
    /// This does not enqueue any input – it only updates defaults used for
    /// turns that rely on persistent session-level context (for example,
    /// [`Op::UserInput`]).
    OverrideTurnContext {
        /// Updated `cwd` for sandbox/tool calls.
        #[serde(skip_serializing_if = "Option::is_none")]
        cwd: Option<PathBuf>,

        /// Updated command approval policy.
        #[serde(skip_serializing_if = "Option::is_none")]
        approval_policy: Option<AskForApproval>,

        /// Updated sandbox policy for tool calls.
        #[serde(skip_serializing_if = "Option::is_none")]
        sandbox_policy: Option<SandboxPolicy>,

        /// Updated Windows sandbox mode for tool execution.
        #[serde(skip_serializing_if = "Option::is_none")]
        windows_sandbox_level: Option<WindowsSandboxLevel>,

        /// Updated model slug. When set, the model info is derived
        /// automatically.
        #[serde(skip_serializing_if = "Option::is_none")]
        model: Option<String>,

        /// Updated reasoning effort (honored only for reasoning-capable models).
        ///
        /// Use `Some(Some(_))` to set a specific effort, `Some(None)` to clear
        /// the effort, or `None` to leave the existing value unchanged.
        #[serde(skip_serializing_if = "Option::is_none")]
        effort: Option<Option<ReasoningEffortConfig>>,

        /// Updated reasoning summary preference (honored only for reasoning-capable models).
        #[serde(skip_serializing_if = "Option::is_none")]
        summary: Option<ReasoningSummaryConfig>,

        /// EXPERIMENTAL - set a pre-set collaboration mode.
        /// Takes precedence over model, effort, and developer instructions if set.
        #[serde(skip_serializing_if = "Option::is_none")]
        collaboration_mode: Option<CollaborationMode>,

        /// Updated personality preference.
        #[serde(skip_serializing_if = "Option::is_none")]
        personality: Option<Personality>,
    },

    /// Approve a command execution
    ExecApproval {
        /// The id of the submission we are approving
        id: String,
        /// The user's decision in response to the request.
        decision: ReviewDecision,
    },

    /// Approve a code patch
    PatchApproval {
        /// The id of the submission we are approving
        id: String,
        /// The user's decision in response to the request.
        decision: ReviewDecision,
    },

    /// Resolve an MCP elicitation request.
    ResolveElicitation {
        /// Name of the MCP server that issued the request.
        server_name: String,
        /// Request identifier from the MCP server.
        request_id: RequestId,
        /// User's decision for the request.
        decision: ElicitationAction,
    },

    /// Resolve a request_user_input tool call.
    #[serde(rename = "user_input_answer", alias = "request_user_input_response")]
    UserInputAnswer {
        /// Turn id for the in-flight request.
        id: String,
        /// User-provided answers.
        response: RequestUserInputResponse,
    },

    /// Resolve a dynamic tool call request.
    DynamicToolResponse {
        /// Call id for the in-flight request.
        id: String,
        /// Tool output payload.
        response: DynamicToolResponse,
    },

    /// Append an entry to the persistent cross-session message history.
    ///
    /// Note the entry is not guaranteed to be logged if the user has
    /// history disabled, it matches the list of "sensitive" patterns, etc.
    AddToHistory {
        /// The message text to be stored.
        text: String,
    },

    /// Request a single history entry identified by `log_id` + `offset`.
    GetHistoryEntryRequest { offset: usize, log_id: u64 },

    /// Request the list of MCP tools available across all configured servers.
    /// Reply is delivered via `EventMsg::McpListToolsResponse`.
    ListMcpTools,

    /// Request MCP servers to reinitialize and refresh cached tool lists.
    RefreshMcpServers { config: McpServerRefreshConfig },

    /// Request the list of available custom prompts.
    ListCustomPrompts,

    /// Request the list of skills for the provided `cwd` values or the session default.
    ListSkills {
        /// Working directories to scope repo skills discovery.
        ///
        /// When empty, the session default working directory is used.
        #[serde(default, skip_serializing_if = "Vec::is_empty")]
        cwds: Vec<PathBuf>,

        /// When true, recompute skills even if a cached result exists.
        #[serde(default, skip_serializing_if = "std::ops::Not::not")]
        force_reload: bool,
    },

    /// Request the agent to summarize the current conversation context.
    /// The agent will use its existing context (either conversation history or previous response id)
    /// to generate a summary which will be returned as an AgentMessage event.
    Compact,

    /// Request Codex to undo a turn (turn are stacked so it is the same effect as CMD + Z).
    Undo,

    /// Request Codex to drop the last N user turns from in-memory context.
    ///
    /// This does not attempt to revert local filesystem changes. Clients are
    /// responsible for undoing any edits on disk.
    ThreadRollback { num_turns: u32 },

    /// Request a code review from the agent.
    Review { review_request: ReviewRequest },

    /// Request to shut down codex instance.
    Shutdown,

    /// Execute a user-initiated one-off shell command (triggered by "!cmd").
    ///
    /// The command string is executed using the user's default shell and may
    /// include shell syntax (pipes, redirects, etc.). Output is streamed via
    /// `ExecCommand*` events and the UI regains control upon `TurnComplete`.
    RunUserShellCommand {
        /// The raw command string after '!'
        command: String,
    },

    /// Request the list of available models.
    ListModels,
}

/// Determines the conditions under which the user is consulted to approve
/// running the command proposed by Codex.
#[derive(
    Debug,
    Clone,
    Copy,
    Default,
    PartialEq,
    Eq,
    Hash,
    Serialize,
    Deserialize,
    Display,
    JsonSchema,
    TS,
)]
#[serde(rename_all = "kebab-case")]
#[strum(serialize_all = "kebab-case")]
pub enum AskForApproval {
    /// Under this policy, only "known safe" commands—as determined by
    /// `is_safe_command()`—that **only read files** are auto‑approved.
    /// Everything else will ask the user to approve.
    #[serde(rename = "untrusted")]
    #[strum(serialize = "untrusted")]
    UnlessTrusted,

    /// *All* commands are auto‑approved, but they are expected to run inside a
    /// sandbox where network access is disabled and writes are confined to a
    /// specific set of paths. If the command fails, it will be escalated to
    /// the user to approve execution without a sandbox.
    OnFailure,

    /// The model decides when to ask the user for approval.
    #[default]
    OnRequest,

    /// Never ask the user to approve commands. Failures are immediately returned
    /// to the model, and never escalated to the user for approval.
    Never,
}

/// Represents whether outbound network access is available to the agent.
#[derive(
    Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Display, Default, JsonSchema, TS,
)]
#[serde(rename_all = "kebab-case")]
#[strum(serialize_all = "kebab-case")]
pub enum NetworkAccess {
    #[default]
    Restricted,
    Enabled,
}

impl NetworkAccess {
    pub fn is_enabled(self) -> bool {
        matches!(self, NetworkAccess::Enabled)
    }
}

/// Determines execution restrictions for model shell commands.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Display, JsonSchema, TS)]
#[strum(serialize_all = "kebab-case")]
#[serde(tag = "type", rename_all = "kebab-case")]
pub enum SandboxPolicy {
    /// No restrictions whatsoever. Use with caution.
    #[serde(rename = "danger-full-access")]
    DangerFullAccess,

    /// Read-only access to the entire file-system.
    #[serde(rename = "read-only")]
    ReadOnly,

    /// Indicates the process is already in an external sandbox. Allows full
    /// disk access while honoring the provided network setting.
    #[serde(rename = "external-sandbox")]
    ExternalSandbox {
        /// Whether the external sandbox permits outbound network traffic.
        #[serde(default)]
        network_access: NetworkAccess,
    },

    /// Same as `ReadOnly` but additionally grants write access to the current
    /// working directory ("workspace").
    #[serde(rename = "workspace-write")]
    WorkspaceWrite {
        /// Additional folders (beyond cwd and possibly TMPDIR) that should be
        /// writable from within the sandbox.
        #[serde(default, skip_serializing_if = "Vec::is_empty")]
        writable_roots: Vec<AbsolutePathBuf>,

        /// When set to `true`, outbound network access is allowed. `false` by
        /// default.
        #[serde(default)]
        network_access: bool,

        /// When set to `true`, will NOT include the per-user `TMPDIR`
        /// environment variable among the default writable roots. Defaults to
        /// `false`.
        #[serde(default)]
        exclude_tmpdir_env_var: bool,

        /// When set to `true`, will NOT include the `/tmp` among the default
        /// writable roots on UNIX. Defaults to `false`.
        #[serde(default)]
        exclude_slash_tmp: bool,
    },
}

/// A writable root path accompanied by a list of subpaths that should remain
/// read‑only even when the root is writable. This is primarily used to ensure
/// that folders containing files that could be modified to escalate the
/// privileges of the agent (e.g. `.codex`, `.git`, notably `.git/hooks`) under
/// a writable root are not modified by the agent.
#[derive(Debug, Clone, PartialEq, Eq, JsonSchema)]
pub struct WritableRoot {
    pub root: AbsolutePathBuf,

    /// By construction, these subpaths are all under `root`.
    pub read_only_subpaths: Vec<AbsolutePathBuf>,
}

impl WritableRoot {
    pub fn is_path_writable(&self, path: &Path) -> bool {
        // Check if the path is under the root.
        if !path.starts_with(&self.root) {
            return false;
        }

        // Check if the path is under any of the read-only subpaths.
        for subpath in &self.read_only_subpaths {
            if path.starts_with(subpath) {
                return false;
            }
        }

        true
    }
}

impl FromStr for SandboxPolicy {
    type Err = serde_json::Error;

    fn from_str(s: &str) -> Result<Self, Self::Err> {
        serde_json::from_str(s)
    }
}

impl SandboxPolicy {
    /// Returns a policy with read-only disk access and no network.
    pub fn new_read_only_policy() -> Self {
        SandboxPolicy::ReadOnly
    }

    /// Returns a policy that can read the entire disk, but can only write to
    /// the current working directory and the per-user tmp dir on macOS. It does
    /// not allow network access.
    pub fn new_workspace_write_policy() -> Self {
        SandboxPolicy::WorkspaceWrite {
            writable_roots: vec![],
            network_access: false,
            exclude_tmpdir_env_var: false,
            exclude_slash_tmp: false,
        }
    }

    /// Always returns `true`; restricting read access is not supported.
    pub fn has_full_disk_read_access(&self) -> bool {
        true
    }

    pub fn has_full_disk_write_access(&self) -> bool {
        match self {
            SandboxPolicy::DangerFullAccess => true,
            SandboxPolicy::ExternalSandbox { .. } => true,
            SandboxPolicy::ReadOnly => false,
            SandboxPolicy::WorkspaceWrite { .. } => false,
        }
    }

    pub fn has_full_network_access(&self) -> bool {
        match self {
            SandboxPolicy::DangerFullAccess => true,
            SandboxPolicy::ExternalSandbox { network_access } => network_access.is_enabled(),
            SandboxPolicy::ReadOnly => false,
            SandboxPolicy::WorkspaceWrite { network_access, .. } => *network_access,
        }
    }

    /// Returns the list of writable roots (tailored to the current working
    /// directory) together with subpaths that should remain read‑only under
    /// each writable root.
    pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<WritableRoot> {
        match self {
            SandboxPolicy::DangerFullAccess => Vec::new(),
            SandboxPolicy::ExternalSandbox { .. } => Vec::new(),
            SandboxPolicy::ReadOnly => Vec::new(),
            SandboxPolicy::WorkspaceWrite {
                writable_roots,
                exclude_tmpdir_env_var,
                exclude_slash_tmp,
                network_access: _,
            } => {
                // Start from explicitly configured writable roots.
                let mut roots: Vec<AbsolutePathBuf> = writable_roots.clone();

                // Always include defaults: cwd, /tmp (if present on Unix), and
                // on macOS, the per-user TMPDIR unless explicitly excluded.
                // TODO(mbolin): cwd param should be AbsolutePathBuf.
                let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd);
                match cwd_absolute {
                    Ok(cwd) => {
                        roots.push(cwd);
                    }
                    Err(e) => {
                        error!(
                            "Ignoring invalid cwd {:?} for sandbox writable root: {}",
                            cwd, e
                        );
                    }
                }

                // Include /tmp on Unix unless explicitly excluded.
                if cfg!(unix) && !exclude_slash_tmp {
                    #[allow(clippy::expect_used)]
                    let slash_tmp =
                        AbsolutePathBuf::from_absolute_path("/tmp").expect("/tmp is absolute");
                    if slash_tmp.as_path().is_dir() {
                        roots.push(slash_tmp);
                    }
                }

                // Include $TMPDIR unless explicitly excluded. On macOS, TMPDIR
                // is per-user, so writes to TMPDIR should not be readable by
                // other users on the system.
                //
                // By comparison, TMPDIR is not guaranteed to be defined on
                // Linux or Windows, but supporting it here gives users a way to
                // provide the model with their own temporary directory without
                // having to hardcode it in the config.
                if !exclude_tmpdir_env_var
                    && let Some(tmpdir) = std::env::var_os("TMPDIR")
                    && !tmpdir.is_empty()
                {
                    match AbsolutePathBuf::from_absolute_path(PathBuf::from(&tmpdir)) {
                        Ok(tmpdir_path) => {
                            roots.push(tmpdir_path);
                        }
                        Err(e) => {
                            error!(
                                "Ignoring invalid TMPDIR value {tmpdir:?} for sandbox writable root: {e}",
                            );
                        }
                    }
                }

                // For each root, compute subpaths that should remain read-only.
                roots
                    .into_iter()
                    .map(|writable_root| {
                        let mut subpaths: Vec<AbsolutePathBuf> = Vec::new();
                        #[allow(clippy::expect_used)]
                        let top_level_git = writable_root
                            .join(".git")
                            .expect(".git is a valid relative path");
                        // This applies to typical repos (directory .git), worktrees/submodules
                        // (file .git with gitdir pointer), and bare repos when the gitdir is the
                        // writable root itself.
                        let top_level_git_is_file = top_level_git.as_path().is_file();
                        let top_level_git_is_dir = top_level_git.as_path().is_dir();
                        if top_level_git_is_dir || top_level_git_is_file {
                            if top_level_git_is_file
                                && is_git_pointer_file(&top_level_git)
                                && let Some(gitdir) = resolve_gitdir_from_file(&top_level_git)
                                && !subpaths
                                    .iter()
                                    .any(|subpath| subpath.as_path() == gitdir.as_path())
                            {
                                subpaths.push(gitdir);
                            }
                            subpaths.push(top_level_git);
                        }
                        #[allow(clippy::expect_used)]
                        let top_level_codex = writable_root
                            .join(".codex")
                            .expect(".codex is a valid relative path");
                        if top_level_codex.as_path().is_dir() {
                            subpaths.push(top_level_codex);
                        }
                        WritableRoot {
                            root: writable_root,
                            read_only_subpaths: subpaths,
                        }
                    })
                    .collect()
            }
        }
    }
}

fn is_git_pointer_file(path: &AbsolutePathBuf) -> bool {
    path.as_path().is_file() && path.as_path().file_name() == Some(OsStr::new(".git"))
}

fn resolve_gitdir_from_file(dot_git: &AbsolutePathBuf) -> Option<AbsolutePathBuf> {
    let contents = match std::fs::read_to_string(dot_git.as_path()) {
        Ok(contents) => contents,
        Err(err) => {
            error!(
                "Failed to read {path} for gitdir pointer: {err}",
                path = dot_git.as_path().display()
            );
            return None;
        }
    };

    let trimmed = contents.trim();
    let (_, gitdir_raw) = match trimmed.split_once(':') {
        Some(parts) => parts,
        None => {
            error!(
                "Expected {path} to contain a gitdir pointer, but it did not match `gitdir: <path>`.",
                path = dot_git.as_path().display()
            );
            return None;
        }
    };
    let gitdir_raw = gitdir_raw.trim();
    if gitdir_raw.is_empty() {
        error!(
            "Expected {path} to contain a gitdir pointer, but it was empty.",
            path = dot_git.as_path().display()
        );
        return None;
    }
    let base = match dot_git.as_path().parent() {
        Some(base) => base,
        None => {
            error!(
                "Unable to resolve parent directory for {path}.",
                path = dot_git.as_path().display()
            );
            return None;
        }
    };
    let gitdir_path = match AbsolutePathBuf::resolve_path_against_base(gitdir_raw, base) {
        Ok(path) => path,
        Err(err) => {
            error!(
                "Failed to resolve gitdir path {gitdir_raw} from {path}: {err}",
                path = dot_git.as_path().display()
            );
            return None;
        }
    };
    if !gitdir_path.as_path().exists() {
        error!(
            "Resolved gitdir path {path} does not exist.",
            path = gitdir_path.as_path().display()
        );
        return None;
    }
    Some(gitdir_path)
}

/// Event Queue Entry - events from agent
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Event {
    /// Submission `id` that this event is correlated with.
    pub id: String,
    /// Payload
    pub msg: EventMsg,
}

/// Response event from the agent
/// NOTE: Make sure none of these values have optional types, as it will mess up the extension code-gen.
#[derive(Debug, Clone, Deserialize, Serialize, Display, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
#[ts(tag = "type")]
#[strum(serialize_all = "snake_case")]
pub enum EventMsg {
    /// Error while executing a submission
    Error(ErrorEvent),

    /// Warning issued while processing a submission. Unlike `Error`, this
    /// indicates the turn continued but the user should still be notified.
    Warning(WarningEvent),

    /// Conversation history was compacted (either automatically or manually).
    ContextCompacted(ContextCompactedEvent),

    /// Conversation history was rolled back by dropping the last N user turns.
    ThreadRolledBack(ThreadRolledBackEvent),

    /// Agent has started a turn.
    /// v1 wire format uses `task_started`; accept `turn_started` for v2 interop.
    #[serde(rename = "task_started", alias = "turn_started")]
    TurnStarted(TurnStartedEvent),

    /// Agent has completed all actions.
    /// v1 wire format uses `task_complete`; accept `turn_complete` for v2 interop.
    #[serde(rename = "task_complete", alias = "turn_complete")]
    TurnComplete(TurnCompleteEvent),

    /// Usage update for the current session, including totals and last turn.
    /// Optional means unknown — UIs should not display when `None`.
    TokenCount(TokenCountEvent),

    /// Agent text output message
    AgentMessage(AgentMessageEvent),

    /// User/system input message (what was sent to the model)
    UserMessage(UserMessageEvent),

    /// Agent text output delta message
    AgentMessageDelta(AgentMessageDeltaEvent),

    /// Reasoning event from agent.
    AgentReasoning(AgentReasoningEvent),

    /// Agent reasoning delta event from agent.
    AgentReasoningDelta(AgentReasoningDeltaEvent),

    /// Raw chain-of-thought from agent.
    AgentReasoningRawContent(AgentReasoningRawContentEvent),

    /// Agent reasoning content delta event from agent.
    AgentReasoningRawContentDelta(AgentReasoningRawContentDeltaEvent),
    /// Signaled when the model begins a new reasoning summary section (e.g., a new titled block).
    AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent),

    /// Ack the client's configure message.
    SessionConfigured(SessionConfiguredEvent),

    /// Incremental MCP startup progress updates.
    McpStartupUpdate(McpStartupUpdateEvent),

    /// Aggregate MCP startup completion summary.
    McpStartupComplete(McpStartupCompleteEvent),

    McpToolCallBegin(McpToolCallBeginEvent),

    McpToolCallEnd(McpToolCallEndEvent),

    WebSearchBegin(WebSearchBeginEvent),

    WebSearchEnd(WebSearchEndEvent),

    /// Notification that the server is about to execute a command.
    ExecCommandBegin(ExecCommandBeginEvent),

    /// Incremental chunk of output from a running command.
    ExecCommandOutputDelta(ExecCommandOutputDeltaEvent),

    /// Terminal interaction for an in-progress command (stdin sent and stdout observed).
    TerminalInteraction(TerminalInteractionEvent),

    ExecCommandEnd(ExecCommandEndEvent),

    /// Notification that the agent attached a local image via the view_image tool.
    ViewImageToolCall(ViewImageToolCallEvent),

    ExecApprovalRequest(ExecApprovalRequestEvent),

    RequestUserInput(RequestUserInputEvent),

    DynamicToolCallRequest(DynamicToolCallRequest),

    ElicitationRequest(ElicitationRequestEvent),

    ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent),

    /// Notification advising the user that something they are using has been
    /// deprecated and should be phased out.
    DeprecationNotice(DeprecationNoticeEvent),

    BackgroundEvent(BackgroundEventEvent),

    UndoStarted(UndoStartedEvent),

    UndoCompleted(UndoCompletedEvent),

    /// Notification that a model stream experienced an error or disconnect
    /// and the system is handling it (e.g., retrying with backoff).
    StreamError(StreamErrorEvent),

    /// Notification that the agent is about to apply a code patch. Mirrors
    /// `ExecCommandBegin` so front‑ends can show progress indicators.
    PatchApplyBegin(PatchApplyBeginEvent),

    /// Notification that a patch application has finished.
    PatchApplyEnd(PatchApplyEndEvent),

    TurnDiff(TurnDiffEvent),

    /// Response to GetHistoryEntryRequest.
    GetHistoryEntryResponse(GetHistoryEntryResponseEvent),

    /// List of MCP tools available to the agent.
    McpListToolsResponse(McpListToolsResponseEvent),

    /// List of custom prompts available to the agent.
    ListCustomPromptsResponse(ListCustomPromptsResponseEvent),

    /// List of skills available to the agent.
    ListSkillsResponse(ListSkillsResponseEvent),

    /// Notification that skill data may have been updated and clients may want to reload.
    SkillsUpdateAvailable,

    PlanUpdate(UpdatePlanArgs),

    TurnAborted(TurnAbortedEvent),

    /// Notification that the agent is shutting down.
    ShutdownComplete,

    /// Entered review mode.
    EnteredReviewMode(ReviewRequest),

    /// Exited review mode with an optional final result to apply.
    ExitedReviewMode(ExitedReviewModeEvent),

    RawResponseItem(RawResponseItemEvent),

    ItemStarted(ItemStartedEvent),
    ItemCompleted(ItemCompletedEvent),

    AgentMessageContentDelta(AgentMessageContentDeltaEvent),
    ReasoningContentDelta(ReasoningContentDeltaEvent),
    ReasoningRawContentDelta(ReasoningRawContentDeltaEvent),

    /// Collab interaction: agent spawn begin.
    CollabAgentSpawnBegin(CollabAgentSpawnBeginEvent),
    /// Collab interaction: agent spawn end.
    CollabAgentSpawnEnd(CollabAgentSpawnEndEvent),
    /// Collab interaction: agent interaction begin.
    CollabAgentInteractionBegin(CollabAgentInteractionBeginEvent),
    /// Collab interaction: agent interaction end.
    CollabAgentInteractionEnd(CollabAgentInteractionEndEvent),
    /// Collab interaction: waiting begin.
    CollabWaitingBegin(CollabWaitingBeginEvent),
    /// Collab interaction: waiting end.
    CollabWaitingEnd(CollabWaitingEndEvent),
    /// Collab interaction: close begin.
    CollabCloseBegin(CollabCloseBeginEvent),
    /// Collab interaction: close end.
    CollabCloseEnd(CollabCloseEndEvent),
}

impl From<CollabAgentSpawnBeginEvent> for EventMsg {
    fn from(event: CollabAgentSpawnBeginEvent) -> Self {
        EventMsg::CollabAgentSpawnBegin(event)
    }
}

impl From<CollabAgentSpawnEndEvent> for EventMsg {
    fn from(event: CollabAgentSpawnEndEvent) -> Self {
        EventMsg::CollabAgentSpawnEnd(event)
    }
}

impl From<CollabAgentInteractionBeginEvent> for EventMsg {
    fn from(event: CollabAgentInteractionBeginEvent) -> Self {
        EventMsg::CollabAgentInteractionBegin(event)
    }
}

impl From<CollabAgentInteractionEndEvent> for EventMsg {
    fn from(event: CollabAgentInteractionEndEvent) -> Self {
        EventMsg::CollabAgentInteractionEnd(event)
    }
}

impl From<CollabWaitingBeginEvent> for EventMsg {
    fn from(event: CollabWaitingBeginEvent) -> Self {
        EventMsg::CollabWaitingBegin(event)
    }
}

impl From<CollabWaitingEndEvent> for EventMsg {
    fn from(event: CollabWaitingEndEvent) -> Self {
        EventMsg::CollabWaitingEnd(event)
    }
}

impl From<CollabCloseBeginEvent> for EventMsg {
    fn from(event: CollabCloseBeginEvent) -> Self {
        EventMsg::CollabCloseBegin(event)
    }
}

impl From<CollabCloseEndEvent> for EventMsg {
    fn from(event: CollabCloseEndEvent) -> Self {
        EventMsg::CollabCloseEnd(event)
    }
}

/// Agent lifecycle status, derived from emitted events.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum AgentStatus {
    /// Agent is waiting for initialization.
    #[default]
    PendingInit,
    /// Agent is currently running.
    Running,
    /// Agent is done. Contains the final assistant message.
    Completed(Option<String>),
    /// Agent encountered an error.
    Errored(String),
    /// Agent has been shutdown.
    Shutdown,
    /// Agent is not found.
    NotFound,
}

/// Codex errors that we expose to clients.
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum CodexErrorInfo {
    ContextWindowExceeded,
    UsageLimitExceeded,
    HttpConnectionFailed {
        http_status_code: Option<u16>,
    },
    /// Failed to connect to the response SSE stream.
    ResponseStreamConnectionFailed {
        http_status_code: Option<u16>,
    },
    InternalServerError,
    Unauthorized,
    BadRequest,
    SandboxError,
    /// The response SSE stream disconnected in the middle of a turnbefore completion.
    ResponseStreamDisconnected {
        http_status_code: Option<u16>,
    },
    /// Reached the retry limit for responses.
    ResponseTooManyFailedAttempts {
        http_status_code: Option<u16>,
    },
    ThreadRollbackFailed,
    Other,
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct RawResponseItemEvent {
    pub item: ResponseItem,
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct ItemStartedEvent {
    pub thread_id: ThreadId,
    pub turn_id: String,
    pub item: TurnItem,
}

impl HasLegacyEvent for ItemStartedEvent {
    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
        match &self.item {
            TurnItem::WebSearch(item) => vec![EventMsg::WebSearchBegin(WebSearchBeginEvent {
                call_id: item.id.clone(),
            })],
            _ => Vec::new(),
        }
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct ItemCompletedEvent {
    pub thread_id: ThreadId,
    pub turn_id: String,
    pub item: TurnItem,
}

pub trait HasLegacyEvent {
    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg>;
}

impl HasLegacyEvent for ItemCompletedEvent {
    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
        self.item.as_legacy_events(show_raw_agent_reasoning)
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct AgentMessageContentDeltaEvent {
    pub thread_id: String,
    pub turn_id: String,
    pub item_id: String,
    pub delta: String,
}

impl HasLegacyEvent for AgentMessageContentDeltaEvent {
    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
        vec![EventMsg::AgentMessageDelta(AgentMessageDeltaEvent {
            delta: self.delta.clone(),
        })]
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct ReasoningContentDeltaEvent {
    pub thread_id: String,
    pub turn_id: String,
    pub item_id: String,
    pub delta: String,
    // load with default value so it's backward compatible with the old format.
    #[serde(default)]
    pub summary_index: i64,
}

impl HasLegacyEvent for ReasoningContentDeltaEvent {
    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
        vec![EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent {
            delta: self.delta.clone(),
        })]
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
pub struct ReasoningRawContentDeltaEvent {
    pub thread_id: String,
    pub turn_id: String,
    pub item_id: String,
    pub delta: String,
    // load with default value so it's backward compatible with the old format.
    #[serde(default)]
    pub content_index: i64,
}

impl HasLegacyEvent for ReasoningRawContentDeltaEvent {
    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
        vec![EventMsg::AgentReasoningRawContentDelta(
            AgentReasoningRawContentDeltaEvent {
                delta: self.delta.clone(),
            },
        )]
    }
}

impl HasLegacyEvent for EventMsg {
    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
        match self {
            EventMsg::ItemStarted(event) => event.as_legacy_events(show_raw_agent_reasoning),
            EventMsg::ItemCompleted(event) => event.as_legacy_events(show_raw_agent_reasoning),
            EventMsg::AgentMessageContentDelta(event) => {
                event.as_legacy_events(show_raw_agent_reasoning)
            }
            EventMsg::ReasoningContentDelta(event) => {
                event.as_legacy_events(show_raw_agent_reasoning)
            }
            EventMsg::ReasoningRawContentDelta(event) => {
                event.as_legacy_events(show_raw_agent_reasoning)
            }
            _ => Vec::new(),
        }
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExitedReviewModeEvent {
    pub review_output: Option<ReviewOutputEvent>,
}

// Individual event payload types matching each `EventMsg` variant.

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ErrorEvent {
    pub message: String,
    #[serde(default)]
    pub codex_error_info: Option<CodexErrorInfo>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct WarningEvent {
    pub message: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ContextCompactedEvent;

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TurnCompleteEvent {
    pub last_agent_message: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TurnStartedEvent {
    // TODO(aibrahim): make this not optional
    pub model_context_window: Option<i64>,
}

#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq, Eq, JsonSchema, TS)]
pub struct TokenUsage {
    #[ts(type = "number")]
    pub input_tokens: i64,
    #[ts(type = "number")]
    pub cached_input_tokens: i64,
    #[ts(type = "number")]
    pub output_tokens: i64,
    #[ts(type = "number")]
    pub reasoning_output_tokens: i64,
    #[ts(type = "number")]
    pub total_tokens: i64,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
pub struct TokenUsageInfo {
    pub total_token_usage: TokenUsage,
    pub last_token_usage: TokenUsage,
    // TODO(aibrahim): make this not optional
    #[ts(type = "number | null")]
    pub model_context_window: Option<i64>,
}

impl TokenUsageInfo {
    pub fn new_or_append(
        info: &Option<TokenUsageInfo>,
        last: &Option<TokenUsage>,
        model_context_window: Option<i64>,
    ) -> Option<Self> {
        if info.is_none() && last.is_none() {
            return None;
        }

        let mut info = match info {
            Some(info) => info.clone(),
            None => Self {
                total_token_usage: TokenUsage::default(),
                last_token_usage: TokenUsage::default(),
                model_context_window,
            },
        };
        if let Some(last) = last {
            info.append_last_usage(last);
        }
        Some(info)
    }

    pub fn append_last_usage(&mut self, last: &TokenUsage) {
        self.total_token_usage.add_assign(last);
        self.last_token_usage = last.clone();
    }

    pub fn fill_to_context_window(&mut self, context_window: i64) {
        let previous_total = self.total_token_usage.total_tokens;
        let delta = (context_window - previous_total).max(0);

        self.model_context_window = Some(context_window);
        self.total_token_usage = TokenUsage {
            total_tokens: context_window,
            ..TokenUsage::default()
        };
        self.last_token_usage = TokenUsage {
            total_tokens: delta,
            ..TokenUsage::default()
        };
    }

    pub fn full_context_window(context_window: i64) -> Self {
        let mut info = Self {
            total_token_usage: TokenUsage::default(),
            last_token_usage: TokenUsage::default(),
            model_context_window: Some(context_window),
        };
        info.fill_to_context_window(context_window);
        info
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TokenCountEvent {
    pub info: Option<TokenUsageInfo>,
    pub rate_limits: Option<RateLimitSnapshot>,
}

#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
pub struct RateLimitSnapshot {
    pub primary: Option<RateLimitWindow>,
    pub secondary: Option<RateLimitWindow>,
    pub credits: Option<CreditsSnapshot>,
    pub plan_type: Option<crate::account::PlanType>,
}

#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
pub struct RateLimitWindow {
    /// Percentage (0-100) of the window that has been consumed.
    pub used_percent: f64,
    /// Rolling window duration, in minutes.
    #[ts(type = "number | null")]
    pub window_minutes: Option<i64>,
    /// Unix timestamp (seconds since epoch) when the window resets.
    #[ts(type = "number | null")]
    pub resets_at: Option<i64>,
}

#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
pub struct CreditsSnapshot {
    pub has_credits: bool,
    pub unlimited: bool,
    pub balance: Option<String>,
}

// Includes prompts, tools and space to call compact.
const BASELINE_TOKENS: i64 = 12000;

impl TokenUsage {
    pub fn is_zero(&self) -> bool {
        self.total_tokens == 0
    }

    pub fn cached_input(&self) -> i64 {
        self.cached_input_tokens.max(0)
    }

    pub fn non_cached_input(&self) -> i64 {
        (self.input_tokens - self.cached_input()).max(0)
    }

    /// Primary count for display as a single absolute value: non-cached input + output.
    pub fn blended_total(&self) -> i64 {
        (self.non_cached_input() + self.output_tokens.max(0)).max(0)
    }

    pub fn tokens_in_context_window(&self) -> i64 {
        self.total_tokens
    }

    /// Estimate the remaining user-controllable percentage of the model's context window.
    ///
    /// `context_window` is the total size of the model's context window.
    /// `BASELINE_TOKENS` should capture tokens that are always present in
    /// the context (e.g., system prompt and fixed tool instructions) so that
    /// the percentage reflects the portion the user can influence.
    ///
    /// This normalizes both the numerator and denominator by subtracting the
    /// baseline, so immediately after the first prompt the UI shows 100% left
    /// and trends toward 0% as the user fills the effective window.
    pub fn percent_of_context_window_remaining(&self, context_window: i64) -> i64 {
        if context_window <= BASELINE_TOKENS {
            return 0;
        }

        let effective_window = context_window - BASELINE_TOKENS;
        let used = (self.tokens_in_context_window() - BASELINE_TOKENS).max(0);
        let remaining = (effective_window - used).max(0);
        ((remaining as f64 / effective_window as f64) * 100.0)
            .clamp(0.0, 100.0)
            .round() as i64
    }

    /// In-place element-wise sum of token counts.
    pub fn add_assign(&mut self, other: &TokenUsage) {
        self.input_tokens += other.input_tokens;
        self.cached_input_tokens += other.cached_input_tokens;
        self.output_tokens += other.output_tokens;
        self.reasoning_output_tokens += other.reasoning_output_tokens;
        self.total_tokens += other.total_tokens;
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
pub struct FinalOutput {
    pub token_usage: TokenUsage,
}

impl From<TokenUsage> for FinalOutput {
    fn from(token_usage: TokenUsage) -> Self {
        Self { token_usage }
    }
}

impl fmt::Display for FinalOutput {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let token_usage = &self.token_usage;

        write!(
            f,
            "Token usage: total={} input={}{} output={}{}",
            format_with_separators(token_usage.blended_total()),
            format_with_separators(token_usage.non_cached_input()),
            if token_usage.cached_input() > 0 {
                format!(
                    " (+ {} cached)",
                    format_with_separators(token_usage.cached_input())
                )
            } else {
                String::new()
            },
            format_with_separators(token_usage.output_tokens),
            if token_usage.reasoning_output_tokens > 0 {
                format!(
                    " (reasoning {})",
                    format_with_separators(token_usage.reasoning_output_tokens)
                )
            } else {
                String::new()
            }
        )
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentMessageEvent {
    pub message: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct UserMessageEvent {
    pub message: String,
    /// Image URLs sourced from `UserInput::Image`. These are safe
    /// to replay in legacy UI history events and correspond to images sent to
    /// the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub images: Option<Vec<String>>,
    /// Local file paths sourced from `UserInput::LocalImage`. These are kept so
    /// the UI can reattach images when editing history, and should not be sent
    /// to the model or treated as API-ready URLs.
    #[serde(default)]
    pub local_images: Vec<std::path::PathBuf>,
    /// UI-defined spans within `message` used to render or persist special elements.
    #[serde(default)]
    pub text_elements: Vec<crate::user_input::TextElement>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentMessageDeltaEvent {
    pub delta: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentReasoningEvent {
    pub text: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentReasoningRawContentEvent {
    pub text: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentReasoningRawContentDeltaEvent {
    pub delta: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentReasoningSectionBreakEvent {
    // load with default value so it's backward compatible with the old format.
    #[serde(default)]
    pub item_id: String,
    #[serde(default)]
    pub summary_index: i64,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct AgentReasoningDeltaEvent {
    pub delta: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
pub struct McpInvocation {
    /// Name of the MCP server as defined in the config.
    pub server: String,
    /// Name of the tool as given by the MCP server.
    pub tool: String,
    /// Arguments to the tool call.
    pub arguments: Option<serde_json::Value>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
pub struct McpToolCallBeginEvent {
    /// Identifier so this can be paired with the McpToolCallEnd event.
    pub call_id: String,
    pub invocation: McpInvocation,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
pub struct McpToolCallEndEvent {
    /// Identifier for the corresponding McpToolCallBegin that finished.
    pub call_id: String,
    pub invocation: McpInvocation,
    #[ts(type = "string")]
    pub duration: Duration,
    /// Result of the tool call. Note this could be an error.
    pub result: Result<CallToolResult, String>,
}

impl McpToolCallEndEvent {
    pub fn is_success(&self) -> bool {
        match &self.result {
            Ok(result) => !result.is_error.unwrap_or(false),
            Err(_) => false,
        }
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct WebSearchBeginEvent {
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct WebSearchEndEvent {
    pub call_id: String,
    pub query: String,
    pub action: WebSearchAction,
}

// Conversation kept for backward compatibility.
/// Response payload for `Op::GetHistory` containing the current session's
/// in-memory transcript.
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ConversationPathResponseEvent {
    pub conversation_id: ThreadId,
    pub path: PathBuf,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ResumedHistory {
    pub conversation_id: ThreadId,
    pub history: Vec<RolloutItem>,
    pub rollout_path: PathBuf,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub enum InitialHistory {
    New,
    Resumed(ResumedHistory),
    Forked(Vec<RolloutItem>),
}

impl InitialHistory {
    pub fn forked_from_id(&self) -> Option<ThreadId> {
        match self {
            InitialHistory::New => None,
            InitialHistory::Resumed(resumed) => {
                resumed.history.iter().find_map(|item| match item {
                    RolloutItem::SessionMeta(meta_line) => meta_line.meta.forked_from_id,
                    _ => None,
                })
            }
            InitialHistory::Forked(items) => items.iter().find_map(|item| match item {
                RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.id),
                _ => None,
            }),
        }
    }

    pub fn session_cwd(&self) -> Option<PathBuf> {
        match self {
            InitialHistory::New => None,
            InitialHistory::Resumed(resumed) => session_cwd_from_items(&resumed.history),
            InitialHistory::Forked(items) => session_cwd_from_items(items),
        }
    }

    pub fn get_rollout_items(&self) -> Vec<RolloutItem> {
        match self {
            InitialHistory::New => Vec::new(),
            InitialHistory::Resumed(resumed) => resumed.history.clone(),
            InitialHistory::Forked(items) => items.clone(),
        }
    }

    pub fn get_event_msgs(&self) -> Option<Vec<EventMsg>> {
        match self {
            InitialHistory::New => None,
            InitialHistory::Resumed(resumed) => Some(
                resumed
                    .history
                    .iter()
                    .filter_map(|ri| match ri {
                        RolloutItem::EventMsg(ev) => Some(ev.clone()),
                        _ => None,
                    })
                    .collect(),
            ),
            InitialHistory::Forked(items) => Some(
                items
                    .iter()
                    .filter_map(|ri| match ri {
                        RolloutItem::EventMsg(ev) => Some(ev.clone()),
                        _ => None,
                    })
                    .collect(),
            ),
        }
    }

    pub fn get_base_instructions(&self) -> Option<BaseInstructions> {
        // TODO: SessionMeta should (in theory) always be first in the history, so we can probably only check the first item?
        match self {
            InitialHistory::New => None,
            InitialHistory::Resumed(resumed) => {
                resumed.history.iter().find_map(|item| match item {
                    RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(),
                    _ => None,
                })
            }
            InitialHistory::Forked(items) => items.iter().find_map(|item| match item {
                RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(),
                _ => None,
            }),
        }
    }
}

fn session_cwd_from_items(items: &[RolloutItem]) -> Option<PathBuf> {
    items.iter().find_map(|item| match item {
        RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.cwd.clone()),
        _ => None,
    })
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
#[serde(rename_all = "lowercase")]
#[ts(rename_all = "lowercase")]
pub enum SessionSource {
    Cli,
    #[default]
    VSCode,
    Exec,
    Mcp,
    SubAgent(SubAgentSource),
    #[serde(other)]
    Unknown,
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum SubAgentSource {
    Review,
    Compact,
    ThreadSpawn {
        parent_thread_id: ThreadId,
        depth: i32,
    },
    Other(String),
}

impl fmt::Display for SessionSource {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            SessionSource::Cli => f.write_str("cli"),
            SessionSource::VSCode => f.write_str("vscode"),
            SessionSource::Exec => f.write_str("exec"),
            SessionSource::Mcp => f.write_str("mcp"),
            SessionSource::SubAgent(sub_source) => write!(f, "subagent_{sub_source}"),
            SessionSource::Unknown => f.write_str("unknown"),
        }
    }
}

impl fmt::Display for SubAgentSource {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            SubAgentSource::Review => f.write_str("review"),
            SubAgentSource::Compact => f.write_str("compact"),
            SubAgentSource::ThreadSpawn {
                parent_thread_id,
                depth,
            } => {
                write!(f, "thread_spawn_{parent_thread_id}_d{depth}")
            }
            SubAgentSource::Other(other) => f.write_str(other),
        }
    }
}

/// SessionMeta contains session-level data that doesn't correspond to a specific turn.
///
/// NOTE: There used to be an `instructions` field here, which stored user_instructions, but we
/// now save that on TurnContext. base_instructions stores the base instructions for the session,
/// and should be used when there is no config override.
#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
pub struct SessionMeta {
    pub id: ThreadId,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub forked_from_id: Option<ThreadId>,
    pub timestamp: String,
    pub cwd: PathBuf,
    pub originator: String,
    pub cli_version: String,
    #[serde(default)]
    pub source: SessionSource,
    pub model_provider: Option<String>,
    /// base_instructions for the session. This *should* always be present when creating a new session,
    /// but may be missing for older sessions. If not present, fall back to rendering the base_instructions
    /// from ModelsManager.
    pub base_instructions: Option<BaseInstructions>,
}

impl Default for SessionMeta {
    fn default() -> Self {
        SessionMeta {
            id: ThreadId::default(),
            forked_from_id: None,
            timestamp: String::new(),
            cwd: PathBuf::new(),
            originator: String::new(),
            cli_version: String::new(),
            source: SessionSource::default(),
            model_provider: None,
            base_instructions: None,
        }
    }
}

#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
pub struct SessionMetaLine {
    #[serde(flatten)]
    pub meta: SessionMeta,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub git: Option<GitInfo>,
}

#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
#[serde(tag = "type", content = "payload", rename_all = "snake_case")]
pub enum RolloutItem {
    SessionMeta(SessionMetaLine),
    ResponseItem(ResponseItem),
    Compacted(CompactedItem),
    TurnContext(TurnContextItem),
    EventMsg(EventMsg),
}

#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
pub struct CompactedItem {
    pub message: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub replacement_history: Option<Vec<ResponseItem>>,
}

impl From<CompactedItem> for ResponseItem {
    fn from(value: CompactedItem) -> Self {
        ResponseItem::Message {
            id: None,
            role: "assistant".to_string(),
            content: vec![ContentItem::OutputText {
                text: value.message,
            }],
            end_turn: None,
        }
    }
}

#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
pub struct TurnContextItem {
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox_policy: SandboxPolicy,
    pub model: String,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub personality: Option<Personality>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    pub collaboration_mode: Option<CollaborationMode>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub effort: Option<ReasoningEffortConfig>,
    pub summary: ReasoningSummaryConfig,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub user_instructions: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub developer_instructions: Option<String>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub final_output_json_schema: Option<Value>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub truncation_policy: Option<TruncationPolicy>,
}

#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(tag = "mode", content = "limit", rename_all = "snake_case")]
pub enum TruncationPolicy {
    Bytes(usize),
    Tokens(usize),
}

#[derive(Serialize, Deserialize, Clone, JsonSchema)]
pub struct RolloutLine {
    pub timestamp: String,
    #[serde(flatten)]
    pub item: RolloutItem,
}

#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
pub struct GitInfo {
    /// Current commit hash (SHA)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub commit_hash: Option<String>,
    /// Current branch name
    #[serde(skip_serializing_if = "Option::is_none")]
    pub branch: Option<String>,
    /// Repository URL (if available from remote)
    #[serde(skip_serializing_if = "Option::is_none")]
    pub repository_url: Option<String>,
}

#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum ReviewDelivery {
    Inline,
    Detached,
}

#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "camelCase")]
#[ts(tag = "type")]
pub enum ReviewTarget {
    /// Review the working tree: staged, unstaged, and untracked files.
    UncommittedChanges,

    /// Review changes between the current branch and the given base branch.
    #[serde(rename_all = "camelCase")]
    #[ts(rename_all = "camelCase")]
    BaseBranch { branch: String },

    /// Review the changes introduced by a specific commit.
    #[serde(rename_all = "camelCase")]
    #[ts(rename_all = "camelCase")]
    Commit {
        sha: String,
        /// Optional human-readable label (e.g., commit subject) for UIs.
        title: Option<String>,
    },

    /// Arbitrary instructions provided by the user.
    #[serde(rename_all = "camelCase")]
    #[ts(rename_all = "camelCase")]
    Custom { instructions: String },
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
/// Review request sent to the review session.
pub struct ReviewRequest {
    pub target: ReviewTarget,
    #[serde(skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub user_facing_hint: Option<String>,
}

/// Structured review result produced by a child review session.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct ReviewOutputEvent {
    pub findings: Vec<ReviewFinding>,
    pub overall_correctness: String,
    pub overall_explanation: String,
    pub overall_confidence_score: f32,
}

impl Default for ReviewOutputEvent {
    fn default() -> Self {
        Self {
            findings: Vec::new(),
            overall_correctness: String::default(),
            overall_explanation: String::default(),
            overall_confidence_score: 0.0,
        }
    }
}

/// A single review finding describing an observed issue or recommendation.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct ReviewFinding {
    pub title: String,
    pub body: String,
    pub confidence_score: f32,
    pub priority: i32,
    pub code_location: ReviewCodeLocation,
}

/// Location of the code related to a review finding.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct ReviewCodeLocation {
    pub absolute_file_path: PathBuf,
    pub line_range: ReviewLineRange,
}

/// Inclusive line range in a file associated with the finding.
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct ReviewLineRange {
    pub start: u32,
    pub end: u32,
}

#[derive(
    Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default,
)]
#[serde(rename_all = "snake_case")]
pub enum ExecCommandSource {
    #[default]
    Agent,
    UserShell,
    UnifiedExecStartup,
    UnifiedExecInteraction,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecCommandBeginEvent {
    /// Identifier so this can be paired with the ExecCommandEnd event.
    pub call_id: String,
    /// Identifier for the underlying PTY process (when available).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub process_id: Option<String>,
    /// Turn ID that this command belongs to.
    pub turn_id: String,
    /// The command to be executed.
    pub command: Vec<String>,
    /// The command's working directory if not the default cwd for the agent.
    pub cwd: PathBuf,
    pub parsed_cmd: Vec<ParsedCommand>,
    /// Where the command originated. Defaults to Agent for backward compatibility.
    #[serde(default)]
    pub source: ExecCommandSource,
    /// Raw input sent to a unified exec session (if this is an interaction event).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub interaction_input: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ExecCommandEndEvent {
    /// Identifier for the ExecCommandBegin that finished.
    pub call_id: String,
    /// Identifier for the underlying PTY process (when available).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub process_id: Option<String>,
    /// Turn ID that this command belongs to.
    pub turn_id: String,
    /// The command that was executed.
    pub command: Vec<String>,
    /// The command's working directory if not the default cwd for the agent.
    pub cwd: PathBuf,
    pub parsed_cmd: Vec<ParsedCommand>,
    /// Where the command originated. Defaults to Agent for backward compatibility.
    #[serde(default)]
    pub source: ExecCommandSource,
    /// Raw input sent to a unified exec session (if this is an interaction event).
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub interaction_input: Option<String>,

    /// Captured stdout
    pub stdout: String,
    /// Captured stderr
    pub stderr: String,
    /// Captured aggregated output
    #[serde(default)]
    pub aggregated_output: String,
    /// The command's exit code.
    pub exit_code: i32,
    /// The duration of the command execution.
    #[ts(type = "string")]
    pub duration: Duration,
    /// Formatted output from the command, as seen by the model.
    pub formatted_output: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ViewImageToolCallEvent {
    /// Identifier for the originating tool call.
    pub call_id: String,
    /// Local filesystem path provided to the tool.
    pub path: PathBuf,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum ExecOutputStream {
    Stdout,
    Stderr,
}

#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct ExecCommandOutputDeltaEvent {
    /// Identifier for the ExecCommandBegin that produced this chunk.
    pub call_id: String,
    /// Which stream produced this chunk.
    pub stream: ExecOutputStream,
    /// Raw bytes from the stream (may not be valid UTF-8).
    #[serde_as(as = "serde_with::base64::Base64")]
    #[schemars(with = "String")]
    #[ts(type = "string")]
    pub chunk: Vec<u8>,
}

#[serde_as]
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct TerminalInteractionEvent {
    /// Identifier for the ExecCommandBegin that produced this chunk.
    pub call_id: String,
    /// Process id associated with the running command.
    pub process_id: String,
    /// Stdin sent to the running session.
    pub stdin: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct BackgroundEventEvent {
    pub message: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct DeprecationNoticeEvent {
    /// Concise summary of what is deprecated.
    pub summary: String,
    /// Optional extra guidance, such as migration steps or rationale.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct UndoStartedEvent {
    #[serde(skip_serializing_if = "Option::is_none")]
    pub message: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct UndoCompletedEvent {
    pub success: bool,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub message: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ThreadRolledBackEvent {
    /// Number of user turns that were removed from context.
    pub num_turns: u32,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct StreamErrorEvent {
    pub message: String,
    #[serde(default)]
    pub codex_error_info: Option<CodexErrorInfo>,
    /// Optional details about the underlying stream failure (often the same
    /// human-readable message that is surfaced as the terminal error if retries
    /// are exhausted).
    #[serde(default)]
    pub additional_details: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct StreamInfoEvent {
    pub message: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct PatchApplyBeginEvent {
    /// Identifier so this can be paired with the PatchApplyEnd event.
    pub call_id: String,
    /// Turn ID that this patch belongs to.
    /// Uses `#[serde(default)]` for backwards compatibility.
    #[serde(default)]
    pub turn_id: String,
    /// If true, there was no ApplyPatchApprovalRequest for this patch.
    pub auto_approved: bool,
    /// The changes to be applied.
    pub changes: HashMap<PathBuf, FileChange>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct PatchApplyEndEvent {
    /// Identifier for the PatchApplyBegin that finished.
    pub call_id: String,
    /// Turn ID that this patch belongs to.
    /// Uses `#[serde(default)]` for backwards compatibility.
    #[serde(default)]
    pub turn_id: String,
    /// Captured stdout (summary printed by apply_patch).
    pub stdout: String,
    /// Captured stderr (parser errors, IO failures, etc.).
    pub stderr: String,
    /// Whether the patch was applied successfully.
    pub success: bool,
    /// The changes that were applied (mirrors PatchApplyBeginEvent::changes).
    #[serde(default)]
    pub changes: HashMap<PathBuf, FileChange>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TurnDiffEvent {
    pub unified_diff: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct GetHistoryEntryResponseEvent {
    pub offset: usize,
    pub log_id: u64,
    /// The entry at the requested offset, if available and parseable.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub entry: Option<HistoryEntry>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct McpListToolsResponseEvent {
    /// Fully qualified tool name -> tool definition.
    pub tools: std::collections::HashMap<String, McpTool>,
    /// Known resources grouped by server name.
    pub resources: std::collections::HashMap<String, Vec<McpResource>>,
    /// Known resource templates grouped by server name.
    pub resource_templates: std::collections::HashMap<String, Vec<McpResourceTemplate>>,
    /// Authentication status for each configured MCP server.
    pub auth_statuses: std::collections::HashMap<String, McpAuthStatus>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct McpStartupUpdateEvent {
    /// Server name being started.
    pub server: String,
    /// Current startup status.
    pub status: McpStartupStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case", tag = "state")]
#[ts(rename_all = "snake_case", tag = "state")]
pub enum McpStartupStatus {
    Starting,
    Ready,
    Failed { error: String },
    Cancelled,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, Default)]
pub struct McpStartupCompleteEvent {
    pub ready: Vec<String>,
    pub failed: Vec<McpStartupFailure>,
    pub cancelled: Vec<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct McpStartupFailure {
    pub server: String,
    pub error: String,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum McpAuthStatus {
    Unsupported,
    NotLoggedIn,
    BearerToken,
    OAuth,
}

impl fmt::Display for McpAuthStatus {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let text = match self {
            McpAuthStatus::Unsupported => "Unsupported",
            McpAuthStatus::NotLoggedIn => "Not logged in",
            McpAuthStatus::BearerToken => "Bearer token",
            McpAuthStatus::OAuth => "OAuth",
        };
        f.write_str(text)
    }
}

/// Response payload for `Op::ListCustomPrompts`.
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ListCustomPromptsResponseEvent {
    pub custom_prompts: Vec<CustomPrompt>,
}

/// Response payload for `Op::ListSkills`.
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct ListSkillsResponseEvent {
    pub skills: Vec<SkillsListEntry>,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
#[ts(rename_all = "snake_case")]
pub enum SkillScope {
    User,
    Repo,
    System,
    Admin,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct SkillMetadata {
    pub name: String,
    pub description: String,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    /// Legacy short_description from SKILL.md. Prefer SKILL.toml interface.short_description.
    pub short_description: Option<String>,
    #[serde(default, skip_serializing_if = "Option::is_none")]
    #[ts(optional)]
    pub interface: Option<SkillInterface>,
    pub path: PathBuf,
    pub scope: SkillScope,
    pub enabled: bool,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq, Eq)]
pub struct SkillInterface {
    #[ts(optional)]
    pub display_name: Option<String>,
    #[ts(optional)]
    pub short_description: Option<String>,
    #[ts(optional)]
    pub icon_small: Option<PathBuf>,
    #[ts(optional)]
    pub icon_large: Option<PathBuf>,
    #[ts(optional)]
    pub brand_color: Option<String>,
    #[ts(optional)]
    pub default_prompt: Option<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct SkillErrorInfo {
    pub path: PathBuf,
    pub message: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct SkillsListEntry {
    pub cwd: PathBuf,
    pub skills: Vec<SkillMetadata>,
    pub errors: Vec<SkillErrorInfo>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct SessionConfiguredEvent {
    /// Name left as session_id instead of thread_id for backwards compatibility.
    pub session_id: ThreadId,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub forked_from_id: Option<ThreadId>,

    /// Tell the client what model is being queried.
    pub model: String,

    pub model_provider_id: String,

    /// When to escalate for approval for execution
    pub approval_policy: AskForApproval,

    /// How to sandbox commands executed in the system
    pub sandbox_policy: SandboxPolicy,

    /// Working directory that should be treated as the *root* of the
    /// session.
    pub cwd: PathBuf,

    /// The effort the model is putting into reasoning about the user's request.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub reasoning_effort: Option<ReasoningEffortConfig>,

    /// Identifier of the history log file (inode on Unix, 0 otherwise).
    pub history_log_id: u64,

    /// Current number of entries in the history log.
    pub history_entry_count: usize,

    /// Optional initial messages (as events) for resumed sessions.
    /// When present, UIs can use these to seed the history.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub initial_messages: Option<Vec<EventMsg>>,

    /// Path in which the rollout is stored. Can be `None` for ephemeral threads
    #[serde(skip_serializing_if = "Option::is_none")]
    pub rollout_path: Option<PathBuf>,
}

/// User's decision in response to an ExecApprovalRequest.
#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq, Display, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum ReviewDecision {
    /// User has approved this command and the agent should execute it.
    Approved,

    /// User has approved this command and wants to apply the proposed execpolicy
    /// amendment so future matching commands are permitted.
    ApprovedExecpolicyAmendment {
        proposed_execpolicy_amendment: ExecPolicyAmendment,
    },

    /// User has approved this command and wants to automatically approve any
    /// future identical instances (`command` and `cwd` match exactly) for the
    /// remainder of the session.
    ApprovedForSession,

    /// User has denied this command and the agent should not execute it, but
    /// it should continue the session and try something else.
    #[default]
    Denied,

    /// User has denied this command and the agent should not do anything until
    /// the user's next command.
    Abort,
}

impl ReviewDecision {
    /// Returns an opaque version of the decision without PII. We can't use an ignored flag
    /// on `serde` because the serialization is required by some surfaces.
    pub fn to_opaque_string(&self) -> &'static str {
        match self {
            ReviewDecision::Approved => "approved",
            ReviewDecision::ApprovedExecpolicyAmendment { .. } => "approved_with_amendment",
            ReviewDecision::ApprovedForSession => "approved_for_session",
            ReviewDecision::Denied => "denied",
            ReviewDecision::Abort => "abort",
        }
    }
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
#[serde(tag = "type", rename_all = "snake_case")]
#[ts(tag = "type")]
pub enum FileChange {
    Add {
        content: String,
    },
    Delete {
        content: String,
    },
    Update {
        unified_diff: String,
        move_path: Option<PathBuf>,
    },
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct Chunk {
    /// 1-based line index of the first line in the original file
    pub orig_index: u32,
    pub deleted_lines: Vec<String>,
    pub inserted_lines: Vec<String>,
}

#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
pub struct TurnAbortedEvent {
    pub reason: TurnAbortReason,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
#[serde(rename_all = "snake_case")]
pub enum TurnAbortReason {
    Interrupted,
    Replaced,
    ReviewEnded,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the newly spawned agent, if it was created.
    pub new_thread_id: Option<ThreadId>,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
    /// Last known status of the new agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingBeginEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receivers.
    pub receiver_thread_ids: Vec<ThreadId>,
    /// ID of the waiting call.
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingEndEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
    /// Last known status of the receiver agents reported to the sender agent.
    pub statuses: HashMap<ThreadId, AgentStatus>,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Last known status of the receiver agent reported to the sender agent before
    /// the close.
    pub status: AgentStatus,
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::items::UserMessageItem;
    use crate::items::WebSearchItem;
    use anyhow::Result;
    use pretty_assertions::assert_eq;
    use serde_json::json;
    use tempfile::NamedTempFile;

    #[test]
    fn external_sandbox_reports_full_access_flags() {
        let restricted = SandboxPolicy::ExternalSandbox {
            network_access: NetworkAccess::Restricted,
        };
        assert!(restricted.has_full_disk_write_access());
        assert!(!restricted.has_full_network_access());

        let enabled = SandboxPolicy::ExternalSandbox {
            network_access: NetworkAccess::Enabled,
        };
        assert!(enabled.has_full_disk_write_access());
        assert!(enabled.has_full_network_access());
    }

    #[test]
    fn item_started_event_from_web_search_emits_begin_event() {
        let event = ItemStartedEvent {
            thread_id: ThreadId::new(),
            turn_id: "turn-1".into(),
            item: TurnItem::WebSearch(WebSearchItem {
                id: "search-1".into(),
                query: "find docs".into(),
                action: WebSearchAction::Search {
                    query: Some("find docs".into()),
                },
            }),
        };

        let legacy_events = event.as_legacy_events(false);
        assert_eq!(legacy_events.len(), 1);
        match &legacy_events[0] {
            EventMsg::WebSearchBegin(event) => assert_eq!(event.call_id, "search-1"),
            _ => panic!("expected WebSearchBegin event"),
        }
    }

    #[test]
    fn item_started_event_from_non_web_search_emits_no_legacy_events() {
        let event = ItemStartedEvent {
            thread_id: ThreadId::new(),
            turn_id: "turn-1".into(),
            item: TurnItem::UserMessage(UserMessageItem::new(&[])),
        };

        assert!(event.as_legacy_events(false).is_empty());
    }

    #[test]
    fn user_input_serialization_omits_final_output_json_schema_when_none() -> Result<()> {
        let op = Op::UserInput {
            items: Vec::new(),
            final_output_json_schema: None,
        };

        let json_op = serde_json::to_value(op)?;
        assert_eq!(json_op, json!({ "type": "user_input", "items": [] }));

        Ok(())
    }

    #[test]
    fn user_input_deserializes_without_final_output_json_schema_field() -> Result<()> {
        let op: Op = serde_json::from_value(json!({ "type": "user_input", "items": [] }))?;

        assert_eq!(
            op,
            Op::UserInput {
                items: Vec::new(),
                final_output_json_schema: None,
            }
        );

        Ok(())
    }

    #[test]
    fn user_input_serialization_includes_final_output_json_schema_when_some() -> Result<()> {
        let schema = json!({
            "type": "object",
            "properties": {
                "answer": { "type": "string" }
            },
            "required": ["answer"],
            "additionalProperties": false
        });
        let op = Op::UserInput {
            items: Vec::new(),
            final_output_json_schema: Some(schema.clone()),
        };

        let json_op = serde_json::to_value(op)?;
        assert_eq!(
            json_op,
            json!({
                "type": "user_input",
                "items": [],
                "final_output_json_schema": schema,
            })
        );

        Ok(())
    }

    #[test]
    fn user_input_text_serializes_empty_text_elements() -> Result<()> {
        let input = UserInput::Text {
            text: "hello".to_string(),
            text_elements: Vec::new(),
        };

        let json_input = serde_json::to_value(input)?;
        assert_eq!(
            json_input,
            json!({
                "type": "text",
                "text": "hello",
                "text_elements": [],
            })
        );

        Ok(())
    }

    #[test]
    fn user_message_event_serializes_empty_metadata_vectors() -> Result<()> {
        let event = UserMessageEvent {
            message: "hello".to_string(),
            images: None,
            local_images: Vec::new(),
            text_elements: Vec::new(),
        };

        let json_event = serde_json::to_value(event)?;
        assert_eq!(
            json_event,
            json!({
                "message": "hello",
                "local_images": [],
                "text_elements": [],
            })
        );

        Ok(())
    }

    /// Serialize Event to verify that its JSON representation has the expected
    /// amount of nesting.
    #[test]
    fn serialize_event() -> Result<()> {
        let conversation_id = ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
        let rollout_file = NamedTempFile::new()?;
        let event = Event {
            id: "1234".to_string(),
            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
                session_id: conversation_id,
                forked_from_id: None,
                model: "codex-mini-latest".to_string(),
                model_provider_id: "openai".to_string(),
                approval_policy: AskForApproval::Never,
                sandbox_policy: SandboxPolicy::ReadOnly,
                cwd: PathBuf::from("/home/user/project"),
                reasoning_effort: Some(ReasoningEffortConfig::default()),
                history_log_id: 0,
                history_entry_count: 0,
                initial_messages: None,
                rollout_path: Some(rollout_file.path().to_path_buf()),
            }),
        };

        let expected = json!({
            "id": "1234",
            "msg": {
                "type": "session_configured",
                "session_id": "67e55044-10b1-426f-9247-bb680e5fe0c8",
                "model": "codex-mini-latest",
                "model_provider_id": "openai",
                "approval_policy": "never",
                "sandbox_policy": {
                    "type": "read-only"
                },
                "cwd": "/home/user/project",
                "reasoning_effort": "medium",
                "history_log_id": 0,
                "history_entry_count": 0,
                "rollout_path": format!("{}", rollout_file.path().display()),
            }
        });
        assert_eq!(expected, serde_json::to_value(&event)?);
        Ok(())
    }

    #[test]
    fn vec_u8_as_base64_serialization_and_deserialization() -> Result<()> {
        let event = ExecCommandOutputDeltaEvent {
            call_id: "call21".to_string(),
            stream: ExecOutputStream::Stdout,
            chunk: vec![1, 2, 3, 4, 5],
        };
        let serialized = serde_json::to_string(&event)?;
        assert_eq!(
            r#"{"call_id":"call21","stream":"stdout","chunk":"AQIDBAU="}"#,
            serialized,
        );

        let deserialized: ExecCommandOutputDeltaEvent = serde_json::from_str(&serialized)?;
        assert_eq!(deserialized, event);
        Ok(())
    }

    #[test]
    fn serialize_mcp_startup_update_event() -> Result<()> {
        let event = Event {
            id: "init".to_string(),
            msg: EventMsg::McpStartupUpdate(McpStartupUpdateEvent {
                server: "srv".to_string(),
                status: McpStartupStatus::Failed {
                    error: "boom".to_string(),
                },
            }),
        };

        let value = serde_json::to_value(&event)?;
        assert_eq!(value["msg"]["type"], "mcp_startup_update");
        assert_eq!(value["msg"]["server"], "srv");
        assert_eq!(value["msg"]["status"]["state"], "failed");
        assert_eq!(value["msg"]["status"]["error"], "boom");
        Ok(())
    }

    #[test]
    fn serialize_mcp_startup_complete_event() -> Result<()> {
        let event = Event {
            id: "init".to_string(),
            msg: EventMsg::McpStartupComplete(McpStartupCompleteEvent {
                ready: vec!["a".to_string()],
                failed: vec![McpStartupFailure {
                    server: "b".to_string(),
                    error: "bad".to_string(),
                }],
                cancelled: vec!["c".to_string()],
            }),
        };

        let value = serde_json::to_value(&event)?;
        assert_eq!(value["msg"]["type"], "mcp_startup_complete");
        assert_eq!(value["msg"]["ready"][0], "a");
        assert_eq!(value["msg"]["failed"][0]["server"], "b");
        assert_eq!(value["msg"]["failed"][0]["error"], "bad");
        assert_eq!(value["msg"]["cancelled"][0], "c");
        Ok(())
    }
}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								//! Defines the protocol for a Codex session between a client and an agent.
 								//!
 								//! Uses a SQ (Submission Queue) / EQ (Event Queue) pattern to asynchronously communicate
 								//! between user and agent.
 								use std::collections::HashMap;
-												feat: add support for read-only bind mounts in the linux sandbox (#9112)

### Motivation

- Landlock alone cannot prevent writes to sensitive in-repo files like
`.git/` when the repo root is writable, so explicit mount restrictions
are required for those paths.
- The sandbox must set up any mounts before calling Landlock so Landlock
can still be applied afterwards and the two mechanisms compose
correctly.

### Description

- Add a new `linux-sandbox` helper `apply_read_only_mounts` in
`linux-sandbox/src/mounts.rs` that: unshares namespaces, maps uids/gids
when required, makes mounts private, bind-mounts targets, and remounts
them read-only.
- Wire the mount step into the sandbox flow by calling
`apply_read_only_mounts(...)` before network/seccomp and before applying
Landlock rules in `linux-sandbox/src/landlock.rs`.
											
										
										
											2026-01-14 08:30:46 -08:00
+								use std::ffi::OsStr;
-												Easily Selectable History (#1672)

This update replaces the previous ratatui history widget with an
append-only log so that the terminal can handle text selection and
scrolling. It also disables streaming responses, which we'll do our best
to bring back in a later PR. It also adds a small summary of token use
after the TUI exits.
											
										
										
											2025-07-25 01:56:40 -07:00
+								use std::fmt;
-												feat: make cwd a required field of Config so we stop assuming std::env::current_dir() in a session (#800)

In order to expose Codex via an MCP server, I realized that we should be
taking `cwd` as a parameter rather than assuming
`std::env::current_dir()` as the `cwd`. Specifically, the user may want
to start a session in a directory other than the one where the MCP
server has been started.

This PR makes `cwd: PathBuf` a required field of `Session` and threads
it all the way through, though I think there is still an issue with not
honoring `workdir` for `apply_patch`, which is something we also had to
fix in the TypeScript version: https://github.com/openai/codex/pull/556.

This also adds `-C`/`--cd` to change the cwd via the command line.

To test, I ran:

```
cargo run --bin codex -- exec -C /tmp 'show the output of ls'
```

and verified it showed the contents of my `/tmp` folder instead of
`$PWD`.
											
										
										
											2025-05-04 10:57:12 -07:00
+								use std::path::Path;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use std::path::PathBuf;
-												remove conversation history widget (#1727)

this widget is no longer used.
											
										
										
											2025-07-30 10:05:40 -07:00
+								use std::str::FromStr;
 								use std::time::Duration;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								use crate::ThreadId;
-												support MCP elicitations (#6947)

No support for request schema yet, but we'll at least show the message
and allow accept/decline.

<img width="823" height="551" alt="Screenshot 2025-11-21 at 2 44 05 PM"
src="https://github.com/user-attachments/assets/6fbb892d-ca12-4765-921e-9ac4b217534d"
/>
											
										
										
											2025-11-21 14:44:53 -08:00
+								use crate::approvals::ElicitationRequestEvent;
-												Add collaboration_mode override to turns (#9408)


											
										
										
											2026-01-16 21:51:25 -08:00
+								use crate::config_types::CollaborationMode;
-												feat(core) update Personality on turn (#9644)

## Summary
Support updating Personality mid-Thread via UserTurn/OverwriteTurn. This
is explicitly unused by the clients so far, to simplify PRs - app-server
and tui implementations will be follow-ups.

## Testing
- [x] added integration tests
											
										
										
											2026-01-22 12:04:23 -08:00
+								use crate::config_types::Personality;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								use crate::config_types::ReasoningSummary as ReasoningSummaryConfig;
-												remove sandbox globals. (#9797)

Threads sandbox updates through OverrideTurnContext for active turn
Passes computed sandbox type into safety/exec
											
										
										
											2026-01-27 11:04:23 -08:00
+								use crate::config_types::WindowsSandboxLevel;
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								use crate::custom_prompts::CustomPrompt;
-												feat: dynamic tools injection (#9539)

## Summary
Add dynamic tool injection to thread startup in API v2, wire dynamic
tool calls through the app server to clients, and plumb responses back
into the model tool pipeline.

### Flow (high level)
- Thread start injects `dynamic_tools` into the model tool list for that
thread (validation is done here).
- When the model emits a tool call for one of those names, core raises a
`DynamicToolCallRequest` event.
- The app server forwards it to the client as `item/tool/call`, waits
for the client’s response, then submits a `DynamicToolResponse` back to
core.
- Core turns that into a `function_call_output` in the next model
request so the model can continue.

### What changed
- Added dynamic tool specs to v2 thread start params and protocol types;
introduced `item/tool/call` (request/response) for dynamic tool
execution.
- Core now registers dynamic tool specs at request time and routes those
calls via a new dynamic tool handler.
- App server validates tool names/schemas, forwards dynamic tool call
requests to clients, and publishes tool outputs back into the session.
- Integration tests
											
										
										
											2026-01-26 11:06:44 +01:00
+								use crate::dynamic_tools::DynamicToolCallRequest;
 								use crate::dynamic_tools::DynamicToolResponse;
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								use crate::items::TurnItem;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								use crate::message_history::HistoryEntry;
-												fix(core) Preserve base_instructions in SessionMeta (#9427)

## Summary
This PR consolidates base_instructions onto SessionMeta /
SessionConfiguration, so we ensure `base_instructions` is set once per
session and should be (mostly) immutable, unless:
- overridden by config on resume / fork
- sub-agent tasks, like review or collab


In a future PR, we should convert all references to `base_instructions`
to consistently used the typed struct, so it's less likely that we put
other strings there. See #9423. However, this PR is already quite
complex, so I'm deferring that to a follow-up.

## Testing
- [x] Added a resume test to assert that instructions are preserved. In
particular, `resume_switches_models_preserves_base_instructions` fails
against main.

Existing test coverage thats assert base instructions are preserved
across multiple requests in a session:
- Manual compact keeps baseline instructions:
core/tests/suite/compact.rs:199
- Auto-compact keeps baseline instructions:
core/tests/suite/compact.rs:1142
- Prompt caching reuses the same instructions across two requests:
core/tests/suite/prompt_caching.rs:150 and
core/tests/suite/prompt_caching.rs:157
- Prompt caching with explicit expected string across two requests:
core/tests/suite/prompt_caching.rs:213 and
core/tests/suite/prompt_caching.rs:222
- Resume with model switch keeps original instructions:
core/tests/suite/resume.rs:136
- Compact/resume/fork uses request 0 instructions for later expected
payloads: core/tests/suite/compact_resume_fork.rs:215
											
										
										
											2026-01-19 21:59:36 -08:00
+								use crate::models::BaseInstructions;
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								use crate::models::ContentItem;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								use crate::models::ResponseItem;
-												fix: handle all web_search actions and in progress invocations (#9960)

### Summary
- Parse all `web_search` tool actions (`search`, `find_in_page`,
`open_page`).
- Previously we only parsed + displayed `search`, which made the TUI
appear to pause when the other actions were being used.
- Show in progress `web_search` calls as `Searching the web`
  - Previously we only showed completed tool calls

<img width="308" height="149" alt="image"
src="https://github.com/user-attachments/assets/90a4e8ff-b06a-48ff-a282-b57b31121845"
/>

### Tests
Added + updated tests, tested locally

### Follow ups
Update VSCode extension to display these as well
											
										
										
											2026-01-26 19:33:48 -08:00
+								use crate::models::WebSearchAction;
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
+								use crate::num_format::format_with_separators;
-												Migrate model preset (#7542)

- Introduce `openai_models` in `/core`
- Move `PRESETS` under it
- Move `ModelPreset`, `ModelUpgrade`, `ReasoningEffortPreset`,
`ReasoningEffortPreset`, and `ReasoningEffortPreset` to `protocol`
- Introduce `Op::ListModels` and `EventMsg::AvailableModels`

Next steps:
- migrate `app-server` and `tui` to use the introduced Operation
											
										
										
											2025-12-03 12:30:43 -08:00
+								use crate::openai_models::ReasoningEffort as ReasoningEffortConfig;
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								use crate::parse_command::ParsedCommand;
 								use crate::plan_tool::UpdatePlanArgs;
-												Feat: request user input tool (#9472)

### Summary
* Add `requestUserInput` tool that the model can use for gather
feedback/asking question mid turn.


### Tool input schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput input",
  "type": "object",
  "additionalProperties": false,
  "required": ["questions"],
  "properties": {
    "questions": {
      "type": "array",
      "description": "Questions to show the user (1-3). Prefer 1 unless multiple independent decisions block progress.",
      "minItems": 1,
      "maxItems": 3,
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["id", "header", "question"],
        "properties": {
          "id": {
            "type": "string",
            "description": "Stable identifier for mapping answers (snake_case)."
          },
          "header": {
            "type": "string",
            "description": "Short header label shown in the UI (12 or fewer chars)."
          },
          "question": {
            "type": "string",
            "description": "Single-sentence prompt shown to the user."
          },
          "options": {
            "type": "array",
            "description": "Optional 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with \"(Recommended)\". Only include \"Other\" option if we want to include a free form option. If the question is free form in nature, do not include any option.",
            "minItems": 2,
            "maxItems": 3,
            "items": {
              "type": "object",
              "additionalProperties": false,
              "required": ["value", "label", "description"],
              "properties": {
                "value": {
                  "type": "string",
                  "description": "Machine-readable value (snake_case)."
                },
                "label": {
                  "type": "string",
                  "description": "User-facing label (1-5 words)."
                },
                "description": {
                  "type": "string",
                  "description": "One short sentence explaining impact/tradeoff if selected."
                }
              }
            }
          }
        }
      }
    }
  }
}
```

### Tool output schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput output",
  "type": "object",
  "additionalProperties": false,
  "required": ["answers"],
  "properties": {
    "answers": {
      "type": "object",
      "description": "Map of question id to user answer.",
      "additionalProperties": {
        "type": "object",
        "additionalProperties": false,
        "required": ["selected"],
        "properties": {
          "selected": {
            "type": "array",
            "items": { "type": "string" }
          },
          "other": {
            "type": ["string", "null"]
          }
        }
      }
    }
  }
}
```
											
										
										
											2026-01-19 10:17:30 -08:00
+								use crate::request_user_input::RequestUserInputResponse;
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								use crate::user_input::UserInput;
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								use codex_utils_absolute_path::AbsolutePathBuf;
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
+								use mcp_types::CallToolResult;
-												support MCP elicitations (#6947)

No support for request schema yet, but we'll at least show the message
and allow accept/decline.

<img width="823" height="551" alt="Screenshot 2025-11-21 at 2 44 05 PM"
src="https://github.com/user-attachments/assets/6fbb892d-ca12-4765-921e-9ac4b217534d"
/>
											
										
										
											2025-11-21 14:44:53 -08:00
+								use mcp_types::RequestId;
-												[MCP] Add support for resources (#5239)

This PR adds support for [MCP
resources](https://modelcontextprotocol.io/specification/2025-06-18/server/resources)
by adding three new tools for the model:
1. `list_resources`
2. `list_resource_templates`
3. `read_resource`

These 3 tools correspond to the [three primary MCP resource protocol
messages](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#protocol-messages).

Example of listing and reading a GitHub resource tempalte
<img width="2984" height="804" alt="CleanShot 2025-10-15 at 17 31 10"
src="https://github.com/user-attachments/assets/89b7f215-2e2a-41c5-90dd-b932ac84a585"
/>

`/mcp` with Figma configured
<img width="2984" height="442" alt="CleanShot 2025-10-15 at 18 29 35"
src="https://github.com/user-attachments/assets/a7578080-2ed2-4c59-b9b4-d8461f90d8ee"
/>

Fixes #4956
											
										
										
											2025-10-16 22:05:15 -07:00
+								use mcp_types::Resource as McpResource;
 								use mcp_types::ResourceTemplate as McpResourceTemplate;
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								use mcp_types::Tool as McpTool;
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								use schemars::JsonSchema;
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								use serde::Deserialize;
 								use serde::Serialize;
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								use serde_json::Value;
-												fix: use a more efficient wire format for ExecCommandOutputDeltaEvent.chunk (#3163)

When serializing to JSON, the existing solution created an enormous
array of ints, which is far more bytes on the wire than a base64-encoded
string would be.
											
										
										
											2025-09-04 08:21:58 -07:00
+								use serde_with::serde_as;
-												Update render name in tui for approval_policy to match with config values  (#1675)

Currently, codex on start shows the value for the approval policy as
name of
[AskForApproval](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/core/src/protocol.rs#L128)
enum, which differs from
[approval_policy](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/config.md#approval_policy)
config values.
E.g. "untrusted" becomes "UnlessTrusted", "on-failure" -> "OnFailure",
"never" -> "Never".
This PR changes render names of the approval policy to match with
configuration values.
											
										
										
											2025-07-24 23:17:57 +02:00
+								use strum_macros::Display;
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								use tracing::error;
-												protocol-ts (#2425)


											
										
										
											2025-08-18 13:08:53 -07:00
+								use ts_rs::TS;
-												feat: read `model_provider` and `model_providers` from config.toml (#853)

This is the first step in supporting other model providers in the Rust
CLI. Specifically, this PR adds support for the new entries in `Config`
and `ConfigOverrides` to specify a `ModelProviderInfo`, which is the
basic config needed for an LLM provider. This PR does not get us all the
way there yet because `client.rs` still categorically appends
`/responses` to the URL and expects the endpoint to support the OpenAI
Responses API. Will fix that next!
											
										
										
											2025-05-07 17:38:28 -07:00
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								pub use crate::approvals::ApplyPatchApprovalRequestEvent;
-												support MCP elicitations (#6947)

No support for request schema yet, but we'll at least show the message
and allow accept/decline.

<img width="823" height="551" alt="Screenshot 2025-11-21 at 2 44 05 PM"
src="https://github.com/user-attachments/assets/6fbb892d-ca12-4765-921e-9ac4b217534d"
/>
											
										
										
											2025-11-21 14:44:53 -08:00
+								pub use crate::approvals::ElicitationAction;
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
+								pub use crate::approvals::ExecApprovalRequestEvent;
-												Refactor execpolicy fallback evaluation (#7544)

## Refactor of the `execpolicy` crate

To illustrate why we need this refactor, consider an agent attempting to
run `apple | rm -rf ./`. Suppose `apple` is allowed by `execpolicy`.
Before this PR, `execpolicy` would consider `apple` and `pear` and only
render one rule match: `Allow`. We would skip any heuristics checks on
`rm -rf ./` and immediately approve `apple | rm -rf ./` to run.

To fix this, we now thread a `fallback` evaluation function into
`execpolicy` that runs when no `execpolicy` rules match a given command.
In our example, we would run `fallback` on `rm -rf ./` and prevent
`apple | rm -rf ./` from being run without approval.
											
										
										
											2025-12-04 02:39:48 -05:00
+								pub use crate::approvals::ExecPolicyAmendment;
-												Feat: request user input tool (#9472)

### Summary
* Add `requestUserInput` tool that the model can use for gather
feedback/asking question mid turn.


### Tool input schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput input",
  "type": "object",
  "additionalProperties": false,
  "required": ["questions"],
  "properties": {
    "questions": {
      "type": "array",
      "description": "Questions to show the user (1-3). Prefer 1 unless multiple independent decisions block progress.",
      "minItems": 1,
      "maxItems": 3,
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["id", "header", "question"],
        "properties": {
          "id": {
            "type": "string",
            "description": "Stable identifier for mapping answers (snake_case)."
          },
          "header": {
            "type": "string",
            "description": "Short header label shown in the UI (12 or fewer chars)."
          },
          "question": {
            "type": "string",
            "description": "Single-sentence prompt shown to the user."
          },
          "options": {
            "type": "array",
            "description": "Optional 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with \"(Recommended)\". Only include \"Other\" option if we want to include a free form option. If the question is free form in nature, do not include any option.",
            "minItems": 2,
            "maxItems": 3,
            "items": {
              "type": "object",
              "additionalProperties": false,
              "required": ["value", "label", "description"],
              "properties": {
                "value": {
                  "type": "string",
                  "description": "Machine-readable value (snake_case)."
                },
                "label": {
                  "type": "string",
                  "description": "User-facing label (1-5 words)."
                },
                "description": {
                  "type": "string",
                  "description": "One short sentence explaining impact/tradeoff if selected."
                }
              }
            }
          }
        }
      }
    }
  }
}
```

### Tool output schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput output",
  "type": "object",
  "additionalProperties": false,
  "required": ["answers"],
  "properties": {
    "answers": {
      "type": "object",
      "description": "Map of question id to user answer.",
      "additionalProperties": {
        "type": "object",
        "additionalProperties": false,
        "required": ["selected"],
        "properties": {
          "selected": {
            "type": "array",
            "items": { "type": "string" }
          },
          "other": {
            "type": ["string", "null"]
          }
        }
      }
    }
  }
}
```
											
										
										
											2026-01-19 10:17:30 -08:00
+								pub use crate::request_user_input::RequestUserInputEvent;
-												Added model summary and risk assessment for commands that violate sandbox policy (#5536)

This PR adds support for a model-based summary and risk assessment for
commands that violate the sandbox policy and require user approval. This
aids the user in evaluating whether the command should be approved.

The feature works by taking a failed command and passing it back to the
model and asking it to summarize the command, give it a risk level (low,
medium, high) and a risk category (e.g. "data deletion" or "data
exfiltration"). It uses a new conversation thread so the context in the
existing thread doesn't influence the answer. If the call to the model
fails or takes longer than 5 seconds, it falls back to the current
behavior.

For now, this is an experimental feature and is gated by a config key
`experimental_sandbox_command_assessment`.

Here is a screen shot of the approval prompt showing the risk assessment
and summary.

<img width="723" height="282" alt="image"
src="https://github.com/user-attachments/assets/4597dd7c-d5a0-4e9f-9d13-414bd082fd6b"
/>
											
										
										
											2025-10-24 17:23:44 -05:00
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								/// Open/close tags for special user-input blocks. Used across crates to avoid
 								/// duplicated hardcoded strings.
 								pub const USER_INSTRUCTIONS_OPEN_TAG: &str = "<user_instructions>";
 								pub const USER_INSTRUCTIONS_CLOSE_TAG: &str = "</user_instructions>";
 								pub const ENVIRONMENT_CONTEXT_OPEN_TAG: &str = "<environment_context>";
 								pub const ENVIRONMENT_CONTEXT_CLOSE_TAG: &str = "</environment_context>";
-												Add collaboration developer instructions (#9424)

- Add additional instructions when they are available
- Make sure to update them on change either UserInput or UserTurn
											
										
										
											2026-01-17 17:31:14 -08:00
+								pub const COLLABORATION_MODE_OPEN_TAG: &str = "<collaboration_mode>";
 								pub const COLLABORATION_MODE_CLOSE_TAG: &str = "</collaboration_mode>";
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								pub const USER_MESSAGE_BEGIN: &str = "## My request for Codex:";
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// Submission Queue Entry - requests from user
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub struct Submission {
 								    /// Unique id for this Submission to correlate with Events
 								    pub id: String,
 								    /// Payload
 								    pub op: Op,
 								}
-												feat: hot reload mcp servers (#8957)

### Summary
* Added `mcpServer/refresh` command to inform app servers and active
threads to refresh mcpServer on next turn event.
* Added `pending_mcp_server_refresh_config` to codex core so that if the
value is populated, we reinitialize the mcp server manager on the thread
level.
* The config is updated on `mcpServer/refresh` command which we iterate
through threads and provide with the latest config value after last
write.
											
										
										
											2026-01-12 11:17:50 -08:00
+								/// Config payload for refreshing MCP servers.
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema)]
 								pub struct McpServerRefreshConfig {
 								    pub mcp_servers: Value,
 								    pub mcp_oauth_credentials_store_mode: Value,
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// Submission operation
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								#[serde(tag = "type", rename_all = "snake_case")]
-												feat: support the chat completions API in the Rust CLI (#862)

This is a substantial PR to add support for the chat completions API,
which in turn makes it possible to use non-OpenAI model providers (just
like in the TypeScript CLI):

* It moves a number of structs from `client.rs` to `client_common.rs` so
they can be shared.
* It introduces support for the chat completions API in
`chat_completions.rs`.
* It updates `ModelProviderInfo` so that `env_key` is `Option<String>`
instead of `String` (for e.g., ollama) and adds a `wire_api` field
* It updates `client.rs` to choose between `stream_responses()` and
`stream_chat_completions()` based on the `wire_api` for the
`ModelProviderInfo`
* It updates the `exec` and TUI CLIs to no longer fail if the
`OPENAI_API_KEY` environment variable is not set
* It updates the TUI so that `EventMsg::Error` is displayed more
prominently when it occurs, particularly now that it is important to
alert users to the `CodexErr::EnvVar` variant.
* `CodexErr::EnvVar` was updated to include an optional `instructions`
field so we can preserve the behavior where we direct users to
https://platform.openai.com if `OPENAI_API_KEY` is not set.
* Cleaned up the "welcome message" in the TUI to ensure the model
provider is displayed.
* Updated the docs in `codex-rs/README.md`.

To exercise the chat completions API from OpenAI models, I added the
following to my `config.toml`:

```toml
model = "gpt-4o"
model_provider = "openai-chat-completions"

[model_providers.openai-chat-completions]
name = "OpenAI using Chat Completions"
base_url = "https://api.openai.com/v1"
env_key = "OPENAI_API_KEY"
wire_api = "chat"
```

Though to test a non-OpenAI provider, I installed ollama with mistral
locally on my Mac because ChatGPT said that would be a good match for my
hardware:

```shell
brew install ollama
ollama serve
ollama pull mistral
```

Then I added the following to my `~/.codex/config.toml`:

```toml
model = "mistral"
model_provider = "ollama"
```

Note this code could certainly use more test coverage, but I want to get
this in so folks can start playing with it.

For reference, I believe https://github.com/openai/codex/pull/247 was
roughly the comparable PR on the TypeScript side.
											
										
										
											2025-05-08 21:46:06 -07:00
+								#[allow(clippy::large_enum_variant)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								#[non_exhaustive]
 								pub enum Op {
 								    /// Abort current task.
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								    /// This server sends [`EventMsg::TurnAborted`] in response.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    Interrupt,
-												Migrate tui to use UserTurn (#9497)

- `tui/` and `tui2/` submit `Op::UserTurn` and own full turn context
(cwd/approval/sandbox/model/etc.).
- `Op::UserInput` is documented as legacy in `codex-protocol` (doc-only;
no `#[deprecated]` to avoid `-D warnings` fallout).
- Remove obsolete `#[allow(deprecated)]` and the unused `ConversationId`
alias/re-export.
											
										
										
											2026-01-19 13:40:39 -08:00
+								    /// Legacy user input.
 								    ///
 								    /// Prefer [`Op::UserTurn`] so the caller provides full turn context
 								    /// (cwd/approval/sandbox/model/etc.) for each turn.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    UserInput {
 								        /// User input items, see `InputItem`
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        items: Vec<UserInput>,
-												feat: expose outputSchema to user_turn/turn_start app_server API (#8377)

What changed
- Added `outputSchema` support to the app-server APIs, mirroring `codex
exec --output-schema` behavior.
- V1 `sendUserTurn` now accepts `outputSchema` and constrains the final
assistant message for that turn.
- V2 `turn/start` now accepts `outputSchema` and constrains the final
assistant message for that turn (explicitly per-turn only).

Core behavior
- `Op::UserTurn` already supported `final_output_json_schema`; now V1
`sendUserTurn` forwards `outputSchema` into that field.
- `Op::UserInput` now carries `final_output_json_schema` for per-turn
settings updates; core maps it into
`SessionSettingsUpdate.final_output_json_schema` so it applies to the
created turn context.
- V2 `turn/start` does NOT persist the schema via `OverrideTurnContext`
(it’s applied only for the current turn). Other overrides
(cwd/model/etc) keep their existing persistent behavior.

API / docs
- `codex-rs/app-server-protocol/src/protocol/v1.rs`: add `output_schema:
Option<serde_json::Value>` to `SendUserTurnParams` (serialized as
`outputSchema`).
- `codex-rs/app-server-protocol/src/protocol/v2.rs`: add `output_schema:
Option<JsonValue>` to `TurnStartParams` (serialized as `outputSchema`).
- `codex-rs/app-server/README.md`: document `outputSchema` for
`turn/start` and clarify it applies only to the current turn.
- `codex-rs/docs/codex_mcp_interface.md`: document `outputSchema` for v1
`sendUserTurn` and v2 `turn/start`.

Tests added/updated
- New app-server integration tests asserting `outputSchema` is forwarded
into outbound `/responses` requests as `text.format`:
  - `codex-rs/app-server/tests/suite/output_schema.rs`
  - `codex-rs/app-server/tests/suite/v2/output_schema.rs`
- Added per-turn semantics tests (schema does not leak to the next
turn):
  - `send_user_turn_output_schema_is_per_turn_v1`
  - `turn_start_output_schema_is_per_turn_v2`
- Added protocol wire-compat tests for the merged op:
  - serialize omits `final_output_json_schema` when `None`
  - deserialize works when field is missing
  - serialize includes `final_output_json_schema` when `Some(schema)`

Call site updates (high level)
- Updated all `Op::UserInput { .. }` constructions to include
`final_output_json_schema`:
  - `codex-rs/app-server/src/codex_message_processor.rs`
  - `codex-rs/core/src/codex_delegate.rs`
  - `codex-rs/mcp-server/src/codex_tool_runner.rs`
  - `codex-rs/tui/src/chatwidget.rs`
  - `codex-rs/tui2/src/chatwidget.rs`
  - plus impacted core tests.

Validation
- `just fmt`
- `cargo test -p codex-core`
- `cargo test -p codex-app-server`
- `cargo test -p codex-mcp-server`
- `cargo test -p codex-tui`
- `cargo test -p codex-tui2`
- `cargo test -p codex-protocol`
- `cargo clippy --all-features --tests --profile dev --fix -- -D
warnings`
											
										
										
											2026-01-05 10:27:00 -08:00
+								        /// Optional JSON Schema used to constrain the final assistant message for this turn.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        final_output_json_schema: Option<Value>,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    },
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								    /// Similar to [`Op::UserInput`], but contains additional context required
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    /// for a turn of a [`crate::codex_thread::CodexThread`].
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								    UserTurn {
 								        /// User input items, see `InputItem`
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								        items: Vec<UserInput>,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
 								        /// `cwd` to use with the [`SandboxPolicy`] and potentially tool calls
 								        /// such as `local_shell`.
 								        cwd: PathBuf,
 								        /// Policy to use for command approval.
 								        approval_policy: AskForApproval,
 								        /// Policy to use for tool calls such as `local_shell`.
 								        sandbox_policy: SandboxPolicy,
-												Add model client sessions (#9102)

Maintain a long-running session.
											
										
										
											2026-01-12 17:15:56 -08:00
+								        /// Must be a valid model slug for the configured client session
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								        /// associated with this conversation.
 								        model: String,
 								        /// Will only be honored if the model is configured to use reasoning.
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								        #[serde(skip_serializing_if = "Option::is_none")]
 								        effort: Option<ReasoningEffortConfig>,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
 								        /// Will only be honored if the model is configured to use reasoning.
 								        summary: ReasoningSummaryConfig,
-												Add exec output-schema parameter (#4079)

Adds structured output to `exec` via the `--structured-output`
parameter.
											
										
										
											2025-09-23 13:59:16 -07:00
+								        // The JSON schema to use for the final assistant message
 								        final_output_json_schema: Option<Value>,
-												Add collaboration_mode override to turns (#9408)


											
										
										
											2026-01-16 21:51:25 -08:00
 								        /// EXPERIMENTAL - set a pre-set collaboration mode.
 								        /// Takes precedence over model, effort, and developer instructions if set.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        collaboration_mode: Option<CollaborationMode>,
-												feat(core) update Personality on turn (#9644)

## Summary
Support updating Personality mid-Thread via UserTurn/OverwriteTurn. This
is explicitly unused by the clients so far, to simplify PRs - app-server
and tui implementations will be follow-ups.

## Testing
- [x] added integration tests
											
										
										
											2026-01-22 12:04:23 -08:00
 								        /// Optional personality override for this turn.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        personality: Option<Personality>,
-												feat: introduce Op:UserTurn (#2329)

This introduces `Op::UserTurn`, which makes it possible to override many
of the fields that were set when the `Session` was originally created
when creating a new conversation turn. This is one way we could support
changing things like `model` or `cwd` in the middle of the conversation,
though we may want to consider making each field optional, or
alternatively having a separate `Op` that mutates the `TurnContext`
associated with a `submission_loop()`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2329).
* #2345
* __->__ #2329
* #2343
* #2340
* #2338
											
										
										
											2025-08-15 09:56:05 -07:00
+								    },
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								    /// Override parts of the persistent turn context for subsequent turns.
 								    ///
 								    /// All fields are optional; when omitted, the existing value is preserved.
 								    /// This does not enqueue any input – it only updates defaults used for
-												Migrate tui to use UserTurn (#9497)

- `tui/` and `tui2/` submit `Op::UserTurn` and own full turn context
(cwd/approval/sandbox/model/etc.).
- `Op::UserInput` is documented as legacy in `codex-protocol` (doc-only;
no `#[deprecated]` to avoid `-D warnings` fallout).
- Remove obsolete `#[allow(deprecated)]` and the unused `ConversationId`
alias/re-export.
											
										
										
											2026-01-19 13:40:39 -08:00
+								    /// turns that rely on persistent session-level context (for example,
 								    /// [`Op::UserInput`]).
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								    OverrideTurnContext {
 								        /// Updated `cwd` for sandbox/tool calls.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        cwd: Option<PathBuf>,
 								        /// Updated command approval policy.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        approval_policy: Option<AskForApproval>,
 								        /// Updated sandbox policy for tool calls.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        sandbox_policy: Option<SandboxPolicy>,
-												remove sandbox globals. (#9797)

Threads sandbox updates through OverrideTurnContext for active turn
Passes computed sandbox type into safety/exec
											
										
										
											2026-01-27 11:04:23 -08:00
+								        /// Updated Windows sandbox mode for tool execution.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        windows_sandbox_level: Option<WindowsSandboxLevel>,
-												Merge Modelfamily into modelinfo (#8763)

- Merge ModelFamily into ModelInfo
- Remove logic for adding instructions to apply patch
- Add compaction limit and visible context window to `ModelInfo`
											
										
										
											2026-01-07 10:35:09 -08:00
+								        /// Updated model slug. When set, the model info is derived
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								        /// automatically.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        model: Option<String>,
 								        /// Updated reasoning effort (honored only for reasoning-capable models).
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								        ///
 								        /// Use `Some(Some(_))` to set a specific effort, `Some(None)` to clear
 								        /// the effort, or `None` to leave the existing value unchanged.
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								        #[serde(skip_serializing_if = "Option::is_none")]
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								        effort: Option<Option<ReasoningEffortConfig>>,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
 								        /// Updated reasoning summary preference (honored only for reasoning-capable models).
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        summary: Option<ReasoningSummaryConfig>,
-												Add collaboration_mode override to turns (#9408)


											
										
										
											2026-01-16 21:51:25 -08:00
 								        /// EXPERIMENTAL - set a pre-set collaboration mode.
 								        /// Takes precedence over model, effort, and developer instructions if set.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        collaboration_mode: Option<CollaborationMode>,
-												feat(core) update Personality on turn (#9644)

## Summary
Support updating Personality mid-Thread via UserTurn/OverwriteTurn. This
is explicitly unused by the clients so far, to simplify PRs - app-server
and tui implementations will be follow-ups.

## Testing
- [x] added integration tests
											
										
										
											2026-01-22 12:04:23 -08:00
 								        /// Updated personality preference.
 								        #[serde(skip_serializing_if = "Option::is_none")]
 								        personality: Option<Personality>,
-												Add an operation to override current task context (#2431)

- Added an operation to override current task context
- Added a test to check that cache stays the same
											
										
										
											2025-08-18 12:59:19 -07:00
+								    },
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Approve a command execution
 								    ExecApproval {
 								        /// The id of the submission we are approving
 								        id: String,
 								        /// The user's decision in response to the request.
 								        decision: ReviewDecision,
 								    },
 								    /// Approve a code patch
 								    PatchApproval {
 								        /// The id of the submission we are approving
 								        id: String,
 								        /// The user's decision in response to the request.
 								        decision: ReviewDecision,
 								    },
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
-												support MCP elicitations (#6947)

No support for request schema yet, but we'll at least show the message
and allow accept/decline.

<img width="823" height="551" alt="Screenshot 2025-11-21 at 2 44 05 PM"
src="https://github.com/user-attachments/assets/6fbb892d-ca12-4765-921e-9ac4b217534d"
/>
											
										
										
											2025-11-21 14:44:53 -08:00
+								    /// Resolve an MCP elicitation request.
 								    ResolveElicitation {
 								        /// Name of the MCP server that issued the request.
 								        server_name: String,
 								        /// Request identifier from the MCP server.
 								        request_id: RequestId,
 								        /// User's decision for the request.
 								        decision: ElicitationAction,
 								    },
-												Feat: request user input tool (#9472)

### Summary
* Add `requestUserInput` tool that the model can use for gather
feedback/asking question mid turn.


### Tool input schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput input",
  "type": "object",
  "additionalProperties": false,
  "required": ["questions"],
  "properties": {
    "questions": {
      "type": "array",
      "description": "Questions to show the user (1-3). Prefer 1 unless multiple independent decisions block progress.",
      "minItems": 1,
      "maxItems": 3,
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["id", "header", "question"],
        "properties": {
          "id": {
            "type": "string",
            "description": "Stable identifier for mapping answers (snake_case)."
          },
          "header": {
            "type": "string",
            "description": "Short header label shown in the UI (12 or fewer chars)."
          },
          "question": {
            "type": "string",
            "description": "Single-sentence prompt shown to the user."
          },
          "options": {
            "type": "array",
            "description": "Optional 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with \"(Recommended)\". Only include \"Other\" option if we want to include a free form option. If the question is free form in nature, do not include any option.",
            "minItems": 2,
            "maxItems": 3,
            "items": {
              "type": "object",
              "additionalProperties": false,
              "required": ["value", "label", "description"],
              "properties": {
                "value": {
                  "type": "string",
                  "description": "Machine-readable value (snake_case)."
                },
                "label": {
                  "type": "string",
                  "description": "User-facing label (1-5 words)."
                },
                "description": {
                  "type": "string",
                  "description": "One short sentence explaining impact/tradeoff if selected."
                }
              }
            }
          }
        }
      }
    }
  }
}
```

### Tool output schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput output",
  "type": "object",
  "additionalProperties": false,
  "required": ["answers"],
  "properties": {
    "answers": {
      "type": "object",
      "description": "Map of question id to user answer.",
      "additionalProperties": {
        "type": "object",
        "additionalProperties": false,
        "required": ["selected"],
        "properties": {
          "selected": {
            "type": "array",
            "items": { "type": "string" }
          },
          "other": {
            "type": ["string", "null"]
          }
        }
      }
    }
  }
}
```
											
										
										
											2026-01-19 10:17:30 -08:00
+								    /// Resolve a request_user_input tool call.
 								    #[serde(rename = "user_input_answer", alias = "request_user_input_response")]
 								    UserInputAnswer {
 								        /// Turn id for the in-flight request.
 								        id: String,
 								        /// User-provided answers.
 								        response: RequestUserInputResponse,
 								    },
-												feat: dynamic tools injection (#9539)

## Summary
Add dynamic tool injection to thread startup in API v2, wire dynamic
tool calls through the app server to clients, and plumb responses back
into the model tool pipeline.

### Flow (high level)
- Thread start injects `dynamic_tools` into the model tool list for that
thread (validation is done here).
- When the model emits a tool call for one of those names, core raises a
`DynamicToolCallRequest` event.
- The app server forwards it to the client as `item/tool/call`, waits
for the client’s response, then submits a `DynamicToolResponse` back to
core.
- Core turns that into a `function_call_output` in the next model
request so the model can continue.

### What changed
- Added dynamic tool specs to v2 thread start params and protocol types;
introduced `item/tool/call` (request/response) for dynamic tool
execution.
- Core now registers dynamic tool specs at request time and routes those
calls via a new dynamic tool handler.
- App server validates tool names/schemas, forwards dynamic tool call
requests to clients, and publishes tool outputs back into the session.
- Integration tests
											
										
										
											2026-01-26 11:06:44 +01:00
+								    /// Resolve a dynamic tool call request.
 								    DynamicToolResponse {
 								        /// Call id for the in-flight request.
 								        id: String,
 								        /// Tool output payload.
 								        response: DynamicToolResponse,
 								    },
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								    /// Append an entry to the persistent cross-session message history.
 								    ///
 								    /// Note the entry is not guaranteed to be logged if the user has
 								    /// history disabled, it matches the list of "sensitive" patterns, etc.
 								    AddToHistory {
 								        /// The message text to be stored.
 								        text: String,
 								    },
 								    /// Request a single history entry identified by `log_id` + `offset`.
 								    GetHistoryEntryRequest { offset: usize, log_id: u64 },
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								    /// Request the list of MCP tools available across all configured servers.
 								    /// Reply is delivered via `EventMsg::McpListToolsResponse`.
 								    ListMcpTools,
-												feat: hot reload mcp servers (#8957)

### Summary
* Added `mcpServer/refresh` command to inform app servers and active
threads to refresh mcpServer on next turn event.
* Added `pending_mcp_server_refresh_config` to codex core so that if the
value is populated, we reinitialize the mcp server manager on the thread
level.
* The config is updated on `mcpServer/refresh` command which we iterate
through threads and provide with the latest config value after last
write.
											
										
										
											2026-01-12 11:17:50 -08:00
+								    /// Request MCP servers to reinitialize and refresh cached tool lists.
 								    RefreshMcpServers { config: McpServerRefreshConfig },
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								    /// Request the list of available custom prompts.
 								    ListCustomPrompts,
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								    /// Request the list of skills for the provided `cwd` values or the session default.
 								    ListSkills {
 								        /// Working directories to scope repo skills discovery.
 								        ///
 								        /// When empty, the session default working directory is used.
 								        #[serde(default, skip_serializing_if = "Vec::is_empty")]
 								        cwds: Vec<PathBuf>,
-												Add public skills + improve repo skill discovery and error UX (#8098)

1. Adds SkillScope::Public end-to-end (core + protocol) and loads skills
from the public cache directory
2. Improves repo skill discovery by searching upward for the nearest
.codex/skills within a git repo
3. Deduplicates skills by name with deterministic ordering to avoid
duplicates across sources
4. Fixes garbled “Skill errors” overlay rendering by preventing pending
history lines from being injected during the modal
5. Updates the project docs “Skills” intro wording to avoid hardcoded
paths
											
										
										
											2025-12-17 01:35:49 -08:00
 								        /// When true, recompute skills even if a cached result exists.
 								        #[serde(default, skip_serializing_if = "std::ops::Not::not")]
 								        force_reload: bool,
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								    },
-												Add /compact (#1527)

- Add operation to summarize the context so far.
- The operation runs a compact task that summarizes the context.
- The operation clear the previous context to free the context window
- The operation didn't use `run_task` to avoid corrupting the session
- Add /compact in the tui



https://github.com/user-attachments/assets/e06c24e5-dcfb-4806-934a-564d425a919c
											
										
										
											2025-07-31 21:34:32 -07:00
+								    /// Request the agent to summarize the current conversation context.
 								    /// The agent will use its existing context (either conversation history or previous response id)
 								    /// to generate a summary which will be returned as an AgentMessage event.
 								    Compact,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								    /// Request Codex to undo a turn (turn are stacked so it is the same effect as CMD + Z).
 								    Undo,
-												feat(app-server): thread/rollback API (#8454)

Add `thread/rollback` to app-server to support IDEs undo-ing the last N
turns of a thread.

For context, an IDE partner will be supporting an "undo" capability
where the IDE (the app-server client) will be responsible for reverting
the local changes made during the last turn. To support this well, we
also need a way to drop the last turn (or more generally, the last N
turns) from the agent's context. This is what `thread/rollback` does.

**Core idea**: A Thread rollback is represented as a persisted event
message (EventMsg::ThreadRollback) in the rollout JSONL file, not by
rewriting history. On resume, both the model's context (core replay) and
the UI turn list (app-server v2's thread history builder) apply these
markers so the pruned history is consistent across live conversations
and `thread/resume`.

Implementation notes:
- Rollback only affects agent context and appends to the rollout file;
clients are responsible for reverting files on disk.
- If a thread rollback is currently in progress, subsequent
`thread/rollback` calls are rejected.
- Because we use `CodexConversation::submit` and codex core tracks
active turns, returning an error on concurrent rollbacks is communicated
via an `EventMsg::Error` with a new variant
`CodexErrorInfo::ThreadRollbackFailed`. app-server watches for that and
sends the BAD_REQUEST RPC response.

Tests cover thread rollbacks in both core and app-server, including when
`num_turns` > existing turns (which clears all turns).

**Note**: this explicitly does **not** behave like `/undo` which we just
removed from the CLI, which does the opposite of what `thread/rollback`
does. `/undo` reverts local changes via ghost commits/snapshots and does
not modify the agent's context / conversation history.
											
										
										
											2026-01-06 13:23:48 -08:00
+								    /// Request Codex to drop the last N user turns from in-memory context.
 								    ///
 								    /// This does not attempt to revert local filesystem changes. Clients are
 								    /// responsible for undoing any edits on disk.
 								    ThreadRollback { num_turns: u32 },
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    /// Request a code review from the agent.
 								    Review { review_request: ReviewRequest },
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								    /// Request to shut down codex instance.
 								    Shutdown,
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
 								    /// Execute a user-initiated one-off shell command (triggered by "!cmd").
 								    ///
 								    /// The command string is executed using the user's default shell and may
 								    /// include shell syntax (pipes, redirects, etc.). Output is streamed via
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								    /// `ExecCommand*` events and the UI regains control upon `TurnComplete`.
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
+								    RunUserShellCommand {
 								        /// The raw command string after '!'
 								        command: String,
 								    },
-												Migrate model preset (#7542)

- Introduce `openai_models` in `/core`
- Move `PRESETS` under it
- Move `ModelPreset`, `ModelUpgrade`, `ReasoningEffortPreset`,
`ReasoningEffortPreset`, and `ReasoningEffortPreset` to `protocol`
- Introduce `Op::ListModels` and `EventMsg::AvailableModels`

Next steps:
- migrate `app-server` and `tui` to use the introduced Operation
											
										
										
											2025-12-03 12:30:43 -08:00
 								    /// Request the list of available models.
 								    ListModels,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												chore: rename unless-allow-listed to untrusted (#1378)

For the `approval_policy` config option, renames `unless-allow-listed`
to `untrusted`. In general, when it comes to exec'ing commands, I think
"trusted" is a more accurate term than "safe."

Also drops the `AskForApproval::AutoEdit` variant, as we were not really
making use of it, anyway.

Fixes https://github.com/openai/codex/issues/1250.


---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1378).
* #1379
* __->__ #1378
											
										
										
											2025-06-24 22:19:21 -07:00
+								/// Determines the conditions under which the user is consulted to approve
 								/// running the command proposed by Codex.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(
 								    Debug,
 								    Clone,
 								    Copy,
 								    Default,
 								    PartialEq,
 								    Eq,
 								    Hash,
 								    Serialize,
 								    Deserialize,
 								    Display,
 								    JsonSchema,
 								    TS,
 								)]
-												feat: load defaults into Config and introduce ConfigOverrides (#677)

This changes how instantiating `Config` works and also adds
`approval_policy` and `sandbox_policy` as fields. The idea is:

* All fields of `Config` have appropriate default values.
* `Config` is initially loaded from `~/.codex/config.toml`, so values in
`config.toml` will override those defaults.
* Clients must instantiate `Config` via
`Config::load_with_overrides(ConfigOverrides)` where `ConfigOverrides`
has optional overrides that are expected to be settable based on CLI
flags.

The `Config` should be defined early in the program and then passed
down. Now functions like `init_codex()` take fewer individual parameters
because they can just take a `Config`.

Also, `Config::load()` used to fail silently if `~/.codex/config.toml`
had a parse error and fell back to the default config. This seemed
really bad because it wasn't clear why the values in my `config.toml`
weren't getting picked up. I changed things so that
`load_with_overrides()` returns `Result<Config>` and verified that the
various CLIs print a reasonable error if `config.toml` is malformed.

Finally, I also updated the TUI to show which **sandbox** value is being
used, as we do for other key values like **model** and **approval**.
This was also a reminder that the various values of `--sandbox` are
honored on Linux but not macOS today, so I added some TODOs about fixing
that.
											
										
										
											2025-04-27 21:47:50 -07:00
+								#[serde(rename_all = "kebab-case")]
-												Update render name in tui for approval_policy to match with config values  (#1675)

Currently, codex on start shows the value for the approval policy as
name of
[AskForApproval](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/core/src/protocol.rs#L128)
enum, which differs from
[approval_policy](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/config.md#approval_policy)
config values.
E.g. "untrusted" becomes "UnlessTrusted", "on-failure" -> "OnFailure",
"never" -> "Never".
This PR changes render names of the approval policy to match with
configuration values.
											
										
										
											2025-07-24 23:17:57 +02:00
+								#[strum(serialize_all = "kebab-case")]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub enum AskForApproval {
-												chore: rename unless-allow-listed to untrusted (#1378)

For the `approval_policy` config option, renames `unless-allow-listed`
to `untrusted`. In general, when it comes to exec'ing commands, I think
"trusted" is a more accurate term than "safe."

Also drops the `AskForApproval::AutoEdit` variant, as we were not really
making use of it, anyway.

Fixes https://github.com/openai/codex/issues/1250.


---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1378).
* #1379
* __->__ #1378
											
										
										
											2025-06-24 22:19:21 -07:00
+								    /// Under this policy, only "known safe" commands—as determined by
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// `is_safe_command()`—that **only read files** are auto‑approved.
 								    /// Everything else will ask the user to approve.
-												chore: rename unless-allow-listed to untrusted (#1378)

For the `approval_policy` config option, renames `unless-allow-listed`
to `untrusted`. In general, when it comes to exec'ing commands, I think
"trusted" is a more accurate term than "safe."

Also drops the `AskForApproval::AutoEdit` variant, as we were not really
making use of it, anyway.

Fixes https://github.com/openai/codex/issues/1250.


---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1378).
* #1379
* __->__ #1378
											
										
										
											2025-06-24 22:19:21 -07:00
+								    #[serde(rename = "untrusted")]
-												Update render name in tui for approval_policy to match with config values  (#1675)

Currently, codex on start shows the value for the approval policy as
name of
[AskForApproval](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/core/src/protocol.rs#L128)
enum, which differs from
[approval_policy](https://github.com/openai/codex/blob/2437a8d17a0cf972d1a6e7f303d469b6e2f57eae/codex-rs/config.md#approval_policy)
config values.
E.g. "untrusted" becomes "UnlessTrusted", "on-failure" -> "OnFailure",
"never" -> "Never".
This PR changes render names of the approval policy to match with
configuration values.
											
										
										
											2025-07-24 23:17:57 +02:00
+								    #[strum(serialize = "untrusted")]
-												chore: rename AskForApproval::UnlessAllowListed to AskForApproval::UnlessTrusted (#1385)

We could just rename to `Untrusted` instead of `UnlessTrusted`, but I
think `AskForApproval::UnlessTrusted` reads a bit better.
											
										
										
											2025-06-25 12:26:13 -07:00
+								    UnlessTrusted,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
 								    /// *All* commands are auto‑approved, but they are expected to run inside a
 								    /// sandbox where network access is disabled and writes are confined to a
 								    /// specific set of paths. If the command fails, it will be escalated to
 								    /// the user to approve execution without a sandbox.
 								    OnFailure,
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								    /// The model decides when to ask the user for approval.
-												[config] Onboarding flow with persistence (#1929)

## Summary
In collaboration with @gpeal: upgrade the onboarding flow, and persist
user settings.

---------

Co-authored-by: Gabriel Peal <gabriel@openai.com>
											
										
										
											2025-08-07 09:27:38 -07:00
+								    #[default]
-												[approval_policy] Add OnRequest approval_policy (#1865)

## Summary
A split-up PR of #1763 , stacked on top of a tools refactor #1858 to
make the change clearer. From the previous summary:

> Let's try something new: tell the model about the sandbox, and let it
decide when it will need to break the sandbox. Some local testing
suggests that it works pretty well with zero iteration on the prompt!

## Testing
- [x] Added unit tests
- [x] Tested locally and it appears to work smoothly!
											
										
										
											2025-08-05 20:44:20 -07:00
+								    OnRequest,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Never ask the user to approve commands. Failures are immediately returned
 								    /// to the model, and never escalated to the user for approval.
 								    Never,
 								}
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								/// Represents whether outbound network access is available to the agent.
 								#[derive(
 								    Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Display, Default, JsonSchema, TS,
 								)]
 								#[serde(rename_all = "kebab-case")]
 								#[strum(serialize_all = "kebab-case")]
 								pub enum NetworkAccess {
 								    #[default]
 								    Restricted,
 								    Enabled,
 								}
 								impl NetworkAccess {
 								    pub fn is_enabled(self) -> bool {
 								        matches!(self, NetworkAccess::Enabled)
 								    }
 								}
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								/// Determines execution restrictions for model shell commands.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Display, JsonSchema, TS)]
-												[prompts] Add <environment_context> (#1869)

## Summary
Includes a new user message in the api payload which provides useful
environment context for the model, so it knows about things like the
current working directory and the sandbox.

## Testing
Updated unit tests
											
										
										
											2025-08-06 01:13:31 -07:00
+								#[strum(serialize_all = "kebab-case")]
-												[app-server] small fixes for JSON schema export and one-of types (#6614)

A partner is consuming our generated JSON schema bundle for app-server
and identified a few issues:
- not all polymorphic / one-of types have a type descriminator
- `"$ref": "#/definitions/v2/SandboxPolicy"` is missing
- "Option<>" is an invalid schema name, and also unnecessary

This PR:
- adds the type descriminator to the various types that are missing it
except for `SessionSource` and `SubAgentSource` because they are
serialized to disk (adding this would break backwards compat for
resume), and they should not be necessary to consume for an integration
with app-server.
- removes the special handling in `export.rs` of various types like
SandboxPolicy, which turned out to be unnecessary and incorrect
- filters out `Option<>` which was auto-generated for request params
that don't need a body

For context, we currently pull in wayyy more types than we need through
the `EventMsg` god object which we are **not** planning to expose in API
v2 (this is how I suspect `SessionSource` and `SubAgentSource` are being
pulled in). But until we have all the necessary v2 notifications in
place that will allow us to remove `EventMsg`, we will keep exporting it
for now.
											
										
										
											2025-11-13 16:25:17 -08:00
+								#[serde(tag = "type", rename_all = "kebab-case")]
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								pub enum SandboxPolicy {
 								    /// No restrictions whatsoever. Use with caution.
 								    #[serde(rename = "danger-full-access")]
 								    DangerFullAccess,
 								    /// Read-only access to the entire file-system.
 								    #[serde(rename = "read-only")]
 								    ReadOnly,
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								    /// Indicates the process is already in an external sandbox. Allows full
 								    /// disk access while honoring the provided network setting.
 								    #[serde(rename = "external-sandbox")]
 								    ExternalSandbox {
 								        /// Whether the external sandbox permits outbound network traffic.
 								        #[serde(default)]
 								        network_access: NetworkAccess,
 								    },
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								    /// Same as `ReadOnly` but additionally grants write access to the current
 								    /// working directory ("workspace").
 								    #[serde(rename = "workspace-write")]
 								    WorkspaceWrite {
 								        /// Additional folders (beyond cwd and possibly TMPDIR) that should be
 								        /// writable from within the sandbox.
 								        #[serde(default, skip_serializing_if = "Vec::is_empty")]
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								        writable_roots: Vec<AbsolutePathBuf>,
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
 								        /// When set to `true`, outbound network access is allowed. `false` by
 								        /// default.
 								        #[serde(default)]
 								        network_access: bool,
-												chore: introduce SandboxPolicy::WorkspaceWrite::include_default_writable_roots (#1785)

Without this change, it is challenging to create integration tests to
verify that the folders not included in `writable_roots` in
`SandboxPolicy::WorkspaceWrite` are read-only because, by default,
`get_writable_roots_with_cwd()` includes `TMPDIR`, which is where most
integrationt
tests do their work.

This introduces a `use_exact_writable_roots` option to disable the
default
includes returned by `get_writable_roots_with_cwd()`.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1785).
* #1765
* __->__ #1785
											
										
										
											2025-08-01 14:15:55 -07:00
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								        /// When set to `true`, will NOT include the per-user `TMPDIR`
 								        /// environment variable among the default writable roots. Defaults to
 								        /// `false`.
 								        #[serde(default)]
 								        exclude_tmpdir_env_var: bool,
 								        /// When set to `true`, will NOT include the `/tmp` among the default
 								        /// writable roots on UNIX. Defaults to `false`.
 								        #[serde(default)]
 								        exclude_slash_tmp: bool,
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								    },
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								}
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								/// A writable root path accompanied by a list of subpaths that should remain
 								/// read‑only even when the root is writable. This is primarily used to ensure
-												feat: if .codex is a sub-folder of a writable root, then make it read-only to the sandbox (#8088)

In preparation for in-repo configuration support, this updates
`WritableRoot::get_writable_roots_with_cwd()` to include the `.codex`
subfolder in `WritableRoot.read_only_subpaths`, if it exists, as we
already do for `.git`.

As noted, currently, like `.git`, `.codex` will only be read-only under
macOS Seatbelt, but we plan to bring support to other OSes, as well.

Updated the integration test in `seatbelt.rs` so that it actually
attempts to run the generated Seatbelt commands, verifying that:

- trying to write to `.codex/config.toml` in a writable root fails
- trying to write to `.git/hooks/pre-commit` in a writable root fails
- trying to write to the writable root containing the `.codex` and
`.git` subfolders succeeds
											
										
										
											2025-12-15 22:54:43 -08:00
+								/// that folders containing files that could be modified to escalate the
 								/// privileges of the agent (e.g. `.codex`, `.git`, notably `.git/hooks`) under
 								/// a writable root are not modified by the agent.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, PartialEq, Eq, JsonSchema)]
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								pub struct WritableRoot {
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								    pub root: AbsolutePathBuf,
-												fix: tighten up checks against writable folders for SandboxPolicy (#2338)

I was looking at the implementation of `Session::get_writable_roots()`,
which did not seem right, as it was a copy of writable roots, which is
not guaranteed to be in sync with the `sandbox_policy` field.

I looked at who was calling `get_writable_roots()` and its only call
site was `apply_patch()` in `codex-rs/core/src/apply_patch.rs`, which
took the roots and forwarded them to `assess_patch_safety()` in
`safety.rs`. I updated `assess_patch_safety()` to take `sandbox_policy:
&SandboxPolicy` instead of `writable_roots: &[PathBuf]` (and replaced
`Session::get_writable_roots()` with `Session::get_sandbox_policy()`).

Within `safety.rs`, it was fairly easy to update
`is_write_patch_constrained_to_writable_paths()` to work with
`SandboxPolicy`, and in particular, it is far more accurate because, for
better or worse, `SandboxPolicy::get_writable_roots_with_cwd()` _returns
an empty vec_ for `SandboxPolicy::DangerFullAccess`, suggesting that
_nothing_ is writable when in reality _everything_ is writable. With
this PR, `is_write_patch_constrained_to_writable_paths()` now does the
right thing for each variant of `SandboxPolicy`.

I thought this would be the end of the story, but it turned out that
`test_writable_roots_constraint()` in `safety.rs` needed to be updated,
as well. In particular, the test was writing to
`std::env::current_dir()` instead of a `TempDir`, which I suspect was a
holdover from earlier when `SandboxPolicy::WorkspaceWrite` would always
make `TMPDIR` writable on macOS, which made it hard to write tests to
verify `SandboxPolicy` in `TMPDIR`. Fortunately, we now have
`exclude_tmpdir_env_var` as an option on
`SandboxPolicy::WorkspaceWrite`, so I was able to update the test to
preserve the existing behavior, but to no longer write to
`std::env::current_dir()`.







---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2338).
* #2345
* #2329
* #2343
* #2340
* __->__ #2338
											
										
										
											2025-08-15 09:06:15 -07:00
-												feat: if .codex is a sub-folder of a writable root, then make it read-only to the sandbox (#8088)

In preparation for in-repo configuration support, this updates
`WritableRoot::get_writable_roots_with_cwd()` to include the `.codex`
subfolder in `WritableRoot.read_only_subpaths`, if it exists, as we
already do for `.git`.

As noted, currently, like `.git`, `.codex` will only be read-only under
macOS Seatbelt, but we plan to bring support to other OSes, as well.

Updated the integration test in `seatbelt.rs` so that it actually
attempts to run the generated Seatbelt commands, verifying that:

- trying to write to `.codex/config.toml` in a writable root fails
- trying to write to `.git/hooks/pre-commit` in a writable root fails
- trying to write to the writable root containing the `.codex` and
`.git` subfolders succeeds
											
										
										
											2025-12-15 22:54:43 -08:00
+								    /// By construction, these subpaths are all under `root`.
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								    pub read_only_subpaths: Vec<AbsolutePathBuf>,
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								}
-												fix: tighten up checks against writable folders for SandboxPolicy (#2338)

I was looking at the implementation of `Session::get_writable_roots()`,
which did not seem right, as it was a copy of writable roots, which is
not guaranteed to be in sync with the `sandbox_policy` field.

I looked at who was calling `get_writable_roots()` and its only call
site was `apply_patch()` in `codex-rs/core/src/apply_patch.rs`, which
took the roots and forwarded them to `assess_patch_safety()` in
`safety.rs`. I updated `assess_patch_safety()` to take `sandbox_policy:
&SandboxPolicy` instead of `writable_roots: &[PathBuf]` (and replaced
`Session::get_writable_roots()` with `Session::get_sandbox_policy()`).

Within `safety.rs`, it was fairly easy to update
`is_write_patch_constrained_to_writable_paths()` to work with
`SandboxPolicy`, and in particular, it is far more accurate because, for
better or worse, `SandboxPolicy::get_writable_roots_with_cwd()` _returns
an empty vec_ for `SandboxPolicy::DangerFullAccess`, suggesting that
_nothing_ is writable when in reality _everything_ is writable. With
this PR, `is_write_patch_constrained_to_writable_paths()` now does the
right thing for each variant of `SandboxPolicy`.

I thought this would be the end of the story, but it turned out that
`test_writable_roots_constraint()` in `safety.rs` needed to be updated,
as well. In particular, the test was writing to
`std::env::current_dir()` instead of a `TempDir`, which I suspect was a
holdover from earlier when `SandboxPolicy::WorkspaceWrite` would always
make `TMPDIR` writable on macOS, which made it hard to write tests to
verify `SandboxPolicy` in `TMPDIR`. Fortunately, we now have
`exclude_tmpdir_env_var` as an option on
`SandboxPolicy::WorkspaceWrite`, so I was able to update the test to
preserve the existing behavior, but to no longer write to
`std::env::current_dir()`.







---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2338).
* #2345
* #2329
* #2343
* #2340
* __->__ #2338
											
										
										
											2025-08-15 09:06:15 -07:00
+								impl WritableRoot {
-												fix: introduce codex-protocol crate (#2355)


											
										
										
											2025-08-15 12:44:40 -07:00
+								    pub fn is_path_writable(&self, path: &Path) -> bool {
-												fix: tighten up checks against writable folders for SandboxPolicy (#2338)

I was looking at the implementation of `Session::get_writable_roots()`,
which did not seem right, as it was a copy of writable roots, which is
not guaranteed to be in sync with the `sandbox_policy` field.

I looked at who was calling `get_writable_roots()` and its only call
site was `apply_patch()` in `codex-rs/core/src/apply_patch.rs`, which
took the roots and forwarded them to `assess_patch_safety()` in
`safety.rs`. I updated `assess_patch_safety()` to take `sandbox_policy:
&SandboxPolicy` instead of `writable_roots: &[PathBuf]` (and replaced
`Session::get_writable_roots()` with `Session::get_sandbox_policy()`).

Within `safety.rs`, it was fairly easy to update
`is_write_patch_constrained_to_writable_paths()` to work with
`SandboxPolicy`, and in particular, it is far more accurate because, for
better or worse, `SandboxPolicy::get_writable_roots_with_cwd()` _returns
an empty vec_ for `SandboxPolicy::DangerFullAccess`, suggesting that
_nothing_ is writable when in reality _everything_ is writable. With
this PR, `is_write_patch_constrained_to_writable_paths()` now does the
right thing for each variant of `SandboxPolicy`.

I thought this would be the end of the story, but it turned out that
`test_writable_roots_constraint()` in `safety.rs` needed to be updated,
as well. In particular, the test was writing to
`std::env::current_dir()` instead of a `TempDir`, which I suspect was a
holdover from earlier when `SandboxPolicy::WorkspaceWrite` would always
make `TMPDIR` writable on macOS, which made it hard to write tests to
verify `SandboxPolicy` in `TMPDIR`. Fortunately, we now have
`exclude_tmpdir_env_var` as an option on
`SandboxPolicy::WorkspaceWrite`, so I was able to update the test to
preserve the existing behavior, but to no longer write to
`std::env::current_dir()`.







---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/2338).
* #2345
* #2329
* #2343
* #2340
* __->__ #2338
											
										
										
											2025-08-15 09:06:15 -07:00
+								        // Check if the path is under the root.
 								        if !path.starts_with(&self.root) {
 								            return false;
 								        }
 								        // Check if the path is under any of the read-only subpaths.
 								        for subpath in &self.read_only_subpaths {
 								            if path.starts_with(subpath) {
 								                return false;
 								            }
 								        }
 								        true
 								    }
 								}
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								impl FromStr for SandboxPolicy {
 								    type Err = serde_json::Error;
 								    fn from_str(s: &str) -> Result<Self, Self::Err> {
 								        serde_json::from_str(s)
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    }
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												[codex-rs] More fine-grained sandbox flag support on Linux (#632)

##### What/Why
This PR makes it so that in Linux we actually respect the different
types of `--sandbox` flag, such that users can apply network and
filesystem restrictions in combination (currently the only supported
behavior), or just pick one or the other.

We should add similar support for OSX in a future PR.

##### Testing
From Linux devbox, updated tests to use more specific flags:
```
test linux::tests_linux::sandbox_blocks_ping ... ok
test linux::tests_linux::sandbox_blocks_getent ... ok
test linux::tests_linux::test_root_read ... ok
test linux::tests_linux::test_dev_null_write ... ok
test linux::tests_linux::sandbox_blocks_dev_tcp_redirection ... ok
test linux::tests_linux::sandbox_blocks_ssh ... ok
test linux::tests_linux::test_writable_root ... ok
test linux::tests_linux::sandbox_blocks_curl ... ok
test linux::tests_linux::sandbox_blocks_wget ... ok
test linux::tests_linux::sandbox_blocks_nc ... ok
test linux::tests_linux::test_root_write - should panic ... ok
```

##### Todo
- [ ] Add negative tests (e.g. confirm you can hit the network if you
configure filesystem only restrictions)
											
										
										
											2025-04-24 15:33:45 -07:00
+								impl SandboxPolicy {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								    /// Returns a policy with read-only disk access and no network.
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    pub fn new_read_only_policy() -> Self {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								        SandboxPolicy::ReadOnly
-												[codex-rs] More fine-grained sandbox flag support on Linux (#632)

##### What/Why
This PR makes it so that in Linux we actually respect the different
types of `--sandbox` flag, such that users can apply network and
filesystem restrictions in combination (currently the only supported
behavior), or just pick one or the other.

We should add similar support for OSX in a future PR.

##### Testing
From Linux devbox, updated tests to use more specific flags:
```
test linux::tests_linux::sandbox_blocks_ping ... ok
test linux::tests_linux::sandbox_blocks_getent ... ok
test linux::tests_linux::test_root_read ... ok
test linux::tests_linux::test_dev_null_write ... ok
test linux::tests_linux::sandbox_blocks_dev_tcp_redirection ... ok
test linux::tests_linux::sandbox_blocks_ssh ... ok
test linux::tests_linux::test_writable_root ... ok
test linux::tests_linux::sandbox_blocks_curl ... ok
test linux::tests_linux::sandbox_blocks_wget ... ok
test linux::tests_linux::sandbox_blocks_nc ... ok
test linux::tests_linux::test_root_write - should panic ... ok
```

##### Todo
- [ ] Add negative tests (e.g. confirm you can hit the network if you
configure filesystem only restrictions)
											
										
										
											2025-04-24 15:33:45 -07:00
+								    }
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								    /// Returns a policy that can read the entire disk, but can only write to
 								    /// the current working directory and the per-user tmp dir on macOS. It does
 								    /// not allow network access.
 								    pub fn new_workspace_write_policy() -> Self {
 								        SandboxPolicy::WorkspaceWrite {
-												fix: pretty-print the sandbox config in the TUI/exec modes (#1376)

Now that https://github.com/openai/codex/pull/1373 simplified the
sandbox config, we can print something much simpler in the TUI (and in
`codex exec`) to summarize the sandbox config.

Before:

![Screenshot 2025-06-24 at 5 45
52 PM](https://github.com/user-attachments/assets/b7633efb-a619-43e1-9abe-7bb0be2d0ec0)

With this change:

![Screenshot 2025-06-24 at 5 46
44 PM](https://github.com/user-attachments/assets/8d099bdd-a429-4796-a08d-70931d984e4f)

For reference, my `config.toml` contains:

```
[sandbox]
mode = "workspace-write"
writable_roots = ["/tmp", "/Users/mbolin/.pyenv/shims"]
```

Fixes https://github.com/openai/codex/issues/1248
											
										
										
											2025-06-24 17:48:51 -07:00
+								            writable_roots: vec![],
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								            network_access: false,
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								            exclude_tmpdir_env_var: false,
 								            exclude_slash_tmp: false,
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								        }
 								    }
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								    /// Always returns `true`; restricting read access is not supported.
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    pub fn has_full_disk_read_access(&self) -> bool {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								        true
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    }
 								    pub fn has_full_disk_write_access(&self) -> bool {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								        match self {
 								            SandboxPolicy::DangerFullAccess => true,
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								            SandboxPolicy::ExternalSandbox { .. } => true,
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								            SandboxPolicy::ReadOnly => false,
 								            SandboxPolicy::WorkspaceWrite { .. } => false,
 								        }
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    }
 								    pub fn has_full_network_access(&self) -> bool {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								        match self {
 								            SandboxPolicy::DangerFullAccess => true,
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								            SandboxPolicy::ExternalSandbox { network_access } => network_access.is_enabled(),
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								            SandboxPolicy::ReadOnly => false,
 								            SandboxPolicy::WorkspaceWrite { network_access, .. } => *network_access,
 								        }
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								    }
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								    /// Returns the list of writable roots (tailored to the current working
 								    /// directory) together with subpaths that should remain read‑only under
 								    /// each writable root.
 								    pub fn get_writable_roots_with_cwd(&self, cwd: &Path) -> Vec<WritableRoot> {
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								        match self {
 								            SandboxPolicy::DangerFullAccess => Vec::new(),
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								            SandboxPolicy::ExternalSandbox { .. } => Vec::new(),
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								            SandboxPolicy::ReadOnly => Vec::new(),
-												chore: introduce SandboxPolicy::WorkspaceWrite::include_default_writable_roots (#1785)

Without this change, it is challenging to create integration tests to
verify that the folders not included in `writable_roots` in
`SandboxPolicy::WorkspaceWrite` are read-only because, by default,
`get_writable_roots_with_cwd()` includes `TMPDIR`, which is where most
integrationt
tests do their work.

This introduces a `use_exact_writable_roots` option to disable the
default
includes returned by `get_writable_roots_with_cwd()`.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1785).
* #1765
* __->__ #1785
											
										
										
											2025-08-01 14:15:55 -07:00
+								            SandboxPolicy::WorkspaceWrite {
 								                writable_roots,
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								                exclude_tmpdir_env_var,
 								                exclude_slash_tmp,
 								                network_access: _,
-												chore: introduce SandboxPolicy::WorkspaceWrite::include_default_writable_roots (#1785)

Without this change, it is challenging to create integration tests to
verify that the folders not included in `writable_roots` in
`SandboxPolicy::WorkspaceWrite` are read-only because, by default,
`get_writable_roots_with_cwd()` includes `TMPDIR`, which is where most
integrationt
tests do their work.

This introduces a `use_exact_writable_roots` option to disable the
default
includes returned by `get_writable_roots_with_cwd()`.




---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/1785).
* #1765
* __->__ #1785
											
										
										
											2025-08-01 14:15:55 -07:00
+								            } => {
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								                // Start from explicitly configured writable roots.
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								                let mut roots: Vec<AbsolutePathBuf> = writable_roots.clone();
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								                // Always include defaults: cwd, /tmp (if present on Unix), and
 								                // on macOS, the per-user TMPDIR unless explicitly excluded.
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								                // TODO(mbolin): cwd param should be AbsolutePathBuf.
 								                let cwd_absolute = AbsolutePathBuf::from_absolute_path(cwd);
 								                match cwd_absolute {
 								                    Ok(cwd) => {
 								                        roots.push(cwd);
 								                    }
 								                    Err(e) => {
 								                        error!(
 								                            "Ignoring invalid cwd {:?} for sandbox writable root: {}",
 								                            cwd, e
 								                        );
 								                    }
 								                }
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
 								                // Include /tmp on Unix unless explicitly excluded.
 								                if cfg!(unix) && !exclude_slash_tmp {
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								                    #[allow(clippy::expect_used)]
 								                    let slash_tmp =
 								                        AbsolutePathBuf::from_absolute_path("/tmp").expect("/tmp is absolute");
 								                    if slash_tmp.as_path().is_dir() {
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								                        roots.push(slash_tmp);
-												fix: pretty-print the sandbox config in the TUI/exec modes (#1376)

Now that https://github.com/openai/codex/pull/1373 simplified the
sandbox config, we can print something much simpler in the TUI (and in
`codex exec`) to summarize the sandbox config.

Before:

![Screenshot 2025-06-24 at 5 45
52 PM](https://github.com/user-attachments/assets/b7633efb-a619-43e1-9abe-7bb0be2d0ec0)

With this change:

![Screenshot 2025-06-24 at 5 46
44 PM](https://github.com/user-attachments/assets/8d099bdd-a429-4796-a08d-70931d984e4f)

For reference, my `config.toml` contains:

```
[sandbox]
mode = "workspace-write"
writable_roots = ["/tmp", "/Users/mbolin/.pyenv/shims"]
```

Fixes https://github.com/openai/codex/issues/1248
											
										
										
											2025-06-24 17:48:51 -07:00
+								                    }
 								                }
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								                // Include $TMPDIR unless explicitly excluded. On macOS, TMPDIR
 								                // is per-user, so writes to TMPDIR should not be readable by
 								                // other users on the system.
 								                //
 								                // By comparison, TMPDIR is not guaranteed to be defined on
 								                // Linux or Windows, but supporting it here gives users a way to
 								                // provide the model with their own temporary directory without
 								                // having to hardcode it in the config.
 								                if !exclude_tmpdir_env_var
 								                    && let Some(tmpdir) = std::env::var_os("TMPDIR")
 								                    && !tmpdir.is_empty()
 								                {
-												fix: include Error in log message (#7955)

This addresses post-merge feedback from
https://github.com/openai/codex/pull/7856.
											
										
										
											2025-12-12 16:31:34 -08:00
+								                    match AbsolutePathBuf::from_absolute_path(PathBuf::from(&tmpdir)) {
 								                        Ok(tmpdir_path) => {
 								                            roots.push(tmpdir_path);
 								                        }
 								                        Err(e) => {
 								                            error!(
 								                                "Ignoring invalid TMPDIR value {tmpdir:?} for sandbox writable root: {e}",
 								                            );
 								                        }
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								                    }
-												feat: add /tmp by default (#1919)

Replaces the `include_default_writable_roots` option on
`sandbox_workspace_write` (that defaulted to `true`, which was slightly
weird/annoying) with `exclude_tmpdir_env_var`, which defaults to
`false`.

Though perhaps more importantly `/tmp` is now enabled by default as part
of `sandbox_mode = "workspace-write"`, though `exclude_slash_tmp =
false` can be used to disable this.
											
										
										
											2025-08-07 00:17:00 -07:00
+								                }
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								                // For each root, compute subpaths that should remain read-only.
-												feat: redesign sandbox config (#1373)

This is a major redesign of how sandbox configuration works and aims to
fix https://github.com/openai/codex/issues/1248. Specifically, it
replaces `sandbox_permissions` in `config.toml` (and the
`-s`/`--sandbox-permission` CLI flags) with a "table" with effectively
three variants:

```toml
# Safest option: full disk is read-only, but writes and network access are disallowed.
[sandbox]
mode = "read-only"

# The cwd of the Codex task is writable, as well as $TMPDIR on macOS.
# writable_roots can be used to specify additional writable folders.
[sandbox]
mode = "workspace-write"
writable_roots = []  # Optional, defaults to the empty list.
network_access = false  # Optional, defaults to false.

# Disable sandboxing: use at your own risk!!!
[sandbox]
mode = "danger-full-access"
```

This should make sandboxing easier to reason about. While we have
dropped support for `-s`, the way it works now is:

- no flags => `read-only`
- `--full-auto` => `workspace-write`
- currently, there is no way to specify `danger-full-access` via a CLI
flag, but we will revisit that as part of
https://github.com/openai/codex/issues/1254

Outstanding issue:

- As noted in the `TODO` on `SandboxPolicy::is_unrestricted()`, we are
still conflating sandbox preferences with approval preferences in that
case, which needs to be cleaned up.
											
										
										
											2025-06-24 16:59:47 -07:00
+								                roots
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								                    .into_iter()
 								                    .map(|writable_root| {
-												feat: add support for read-only bind mounts in the linux sandbox (#9112)

### Motivation

- Landlock alone cannot prevent writes to sensitive in-repo files like
`.git/` when the repo root is writable, so explicit mount restrictions
are required for those paths.
- The sandbox must set up any mounts before calling Landlock so Landlock
can still be applied afterwards and the two mechanisms compose
correctly.

### Description

- Add a new `linux-sandbox` helper `apply_read_only_mounts` in
`linux-sandbox/src/mounts.rs` that: unshares namespaces, maps uids/gids
when required, makes mounts private, bind-mounts targets, and remounts
them read-only.
- Wire the mount step into the sandbox flow by calling
`apply_read_only_mounts(...)` before network/seccomp and before applying
Landlock rules in `linux-sandbox/src/landlock.rs`.
											
										
										
											2026-01-14 08:30:46 -08:00
+								                        let mut subpaths: Vec<AbsolutePathBuf> = Vec::new();
-												fix: introduce AbsolutePathBuf as part of sandbox config (#7856)

Changes the `writable_roots` field of the `WorkspaceWrite` variant of
the `SandboxPolicy` enum from `Vec<PathBuf>` to `Vec<AbsolutePathBuf>`.
This is helpful because now callers can be sure the value is an absolute
path rather than a relative one. (Though when using an absolute path in
a Seatbelt config policy, we still have to _canonicalize_ it first.)

Because `writable_roots` can be read from a config file, it is important
that we are able to resolve relative paths properly using the parent
folder of the config file as the base path.
											
										
										
											2025-12-12 15:25:22 -08:00
+								                        #[allow(clippy::expect_used)]
 								                        let top_level_git = writable_root
 								                            .join(".git")
 								                            .expect(".git is a valid relative path");
-												feat: add support for read-only bind mounts in the linux sandbox (#9112)

### Motivation

- Landlock alone cannot prevent writes to sensitive in-repo files like
`.git/` when the repo root is writable, so explicit mount restrictions
are required for those paths.
- The sandbox must set up any mounts before calling Landlock so Landlock
can still be applied afterwards and the two mechanisms compose
correctly.

### Description

- Add a new `linux-sandbox` helper `apply_read_only_mounts` in
`linux-sandbox/src/mounts.rs` that: unshares namespaces, maps uids/gids
when required, makes mounts private, bind-mounts targets, and remounts
them read-only.
- Wire the mount step into the sandbox flow by calling
`apply_read_only_mounts(...)` before network/seccomp and before applying
Landlock rules in `linux-sandbox/src/landlock.rs`.
											
										
										
											2026-01-14 08:30:46 -08:00
+								                        // This applies to typical repos (directory .git), worktrees/submodules
 								                        // (file .git with gitdir pointer), and bare repos when the gitdir is the
 								                        // writable root itself.
 								                        let top_level_git_is_file = top_level_git.as_path().is_file();
 								                        let top_level_git_is_dir = top_level_git.as_path().is_dir();
 								                        if top_level_git_is_dir || top_level_git_is_file {
 								                            if top_level_git_is_file
 								                                && is_git_pointer_file(&top_level_git)
 								                                && let Some(gitdir) = resolve_gitdir_from_file(&top_level_git)
 								                                && !subpaths
 								                                    .iter()
 								                                    .any(|subpath| subpath.as_path() == gitdir.as_path())
 								                            {
 								                                subpaths.push(gitdir);
 								                            }
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								                            subpaths.push(top_level_git);
 								                        }
-												feat: if .codex is a sub-folder of a writable root, then make it read-only to the sandbox (#8088)

In preparation for in-repo configuration support, this updates
`WritableRoot::get_writable_roots_with_cwd()` to include the `.codex`
subfolder in `WritableRoot.read_only_subpaths`, if it exists, as we
already do for `.git`.

As noted, currently, like `.git`, `.codex` will only be read-only under
macOS Seatbelt, but we plan to bring support to other OSes, as well.

Updated the integration test in `seatbelt.rs` so that it actually
attempts to run the generated Seatbelt commands, verifying that:

- trying to write to `.codex/config.toml` in a writable root fails
- trying to write to `.git/hooks/pre-commit` in a writable root fails
- trying to write to the writable root containing the `.codex` and
`.git` subfolders succeeds
											
										
										
											2025-12-15 22:54:43 -08:00
+								                        #[allow(clippy::expect_used)]
 								                        let top_level_codex = writable_root
 								                            .join(".codex")
 								                            .expect(".codex is a valid relative path");
 								                        if top_level_codex.as_path().is_dir() {
 								                            subpaths.push(top_level_codex);
 								                        }
-												feat: make .git read-only within a writable root when using Seatbelt (#1765)

To make `--full-auto` safer, this PR updates the Seatbelt policy so that
a `SandboxPolicy` with a `writable_root` that contains a `.git/`
_directory_ will make `.git/` _read-only_ (though as a follow-up, we
should also consider the case where `.git` is a _file_ with a `gitdir:
/path/to/actual/repo/.git` entry that should also be protected).

The two major changes in this PR:

- Updating `SandboxPolicy::get_writable_roots_with_cwd()` to return a
`Vec<WritableRoot>` instead of a `Vec<PathBuf>` where a `WritableRoot`
can specify a list of read-only subpaths.
- Updating `create_seatbelt_command_args()` to honor the read-only
subpaths in `WritableRoot`.

The logic to update the policy is a fairly straightforward update to
`create_seatbelt_command_args()`, but perhaps the more interesting part
of this PR is the introduction of an integration test in
`tests/sandbox.rs`. Leveraging the new API in #1785, we test
`SandboxPolicy` under various conditions, including ones where `$TMPDIR`
is not readable, which is critical for verifying the new behavior.

To ensure that Codex can run its own tests, e.g.:

```
just codex debug seatbelt --full-auto -- cargo test if_git_repo_is_writable_root_then_dot_git_folder_is_read_only
```

I had to introduce the use of `CODEX_SANDBOX=sandbox`, which is
comparable to how `CODEX_SANDBOX_NETWORK_DISABLED=1` was already being
used.

Adding a comparable change for Landlock will be done in a subsequent PR.
											
										
										
											2025-08-01 16:11:24 -07:00
+								                        WritableRoot {
 								                            root: writable_root,
 								                            read_only_subpaths: subpaths,
 								                        }
 								                    })
 								                    .collect()
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
+								            }
 								        }
-												[codex-rs] More fine-grained sandbox flag support on Linux (#632)

##### What/Why
This PR makes it so that in Linux we actually respect the different
types of `--sandbox` flag, such that users can apply network and
filesystem restrictions in combination (currently the only supported
behavior), or just pick one or the other.

We should add similar support for OSX in a future PR.

##### Testing
From Linux devbox, updated tests to use more specific flags:
```
test linux::tests_linux::sandbox_blocks_ping ... ok
test linux::tests_linux::sandbox_blocks_getent ... ok
test linux::tests_linux::test_root_read ... ok
test linux::tests_linux::test_dev_null_write ... ok
test linux::tests_linux::sandbox_blocks_dev_tcp_redirection ... ok
test linux::tests_linux::sandbox_blocks_ssh ... ok
test linux::tests_linux::test_writable_root ... ok
test linux::tests_linux::sandbox_blocks_curl ... ok
test linux::tests_linux::sandbox_blocks_wget ... ok
test linux::tests_linux::sandbox_blocks_nc ... ok
test linux::tests_linux::test_root_write - should panic ... ok
```

##### Todo
- [ ] Add negative tests (e.g. confirm you can hit the network if you
configure filesystem only restrictions)
											
										
										
											2025-04-24 15:33:45 -07:00
+								    }
 								}
-												fix: overhaul SandboxPolicy and config loading in Rust (#732)

Previous to this PR, `SandboxPolicy` was a bit difficult to work with:


https://github.com/openai/codex/blob/237f8a11e11fdcc793a09e787e48215676d9b95b/codex-rs/core/src/protocol.rs#L98-L108

Specifically:

* It was an `enum` and therefore options were mutually exclusive as
opposed to additive.
* It defined things in terms of what the agent _could not_ do as opposed
to what they _could_ do. This made things hard to support because we
would prefer to build up a sandbox config by starting with something
extremely restrictive and only granting permissions for things the user
as explicitly allowed.

This PR changes things substantially by redefining the policy in terms
of two concepts:

* A `SandboxPermission` enum that defines permissions that can be
granted to the agent/sandbox.
* A `SandboxPolicy` that internally stores a `Vec<SandboxPermission>`,
but externally exposes a simpler API that can be used to configure
Seatbelt/Landlock.

Previous to this PR, we supported a `--sandbox` flag that effectively
mapped to an enum value in `SandboxPolicy`. Though now that
`SandboxPolicy` is a wrapper around `Vec<SandboxPermission>`, the single
`--sandbox` flag no longer makes sense. While I could have turned it
into a flag that the user can specify multiple times, I think the
current values to use with such a flag are long and potentially messy,
so for the moment, I have dropped support for `--sandbox` altogether and
we can bring it back once we have figured out the naming thing.

Since `--sandbox` is gone, users now have to specify `--full-auto` to
get a sandbox that allows writes in `cwd`. Admittedly, there is no clean
way to specify the equivalent of `--full-auto` in your `config.toml`
right now, so we will have to revisit that, as well.

Because `Config` presents a `SandboxPolicy` field and `SandboxPolicy`
changed considerably, I had to overhaul how config loading works, as
well. There are now two distinct concepts, `ConfigToml` and `Config`:

* `ConfigToml` is the deserialization of `~/.codex/config.toml`. As one
might expect, every field is `Optional` and it is `#[derive(Deserialize,
Default)]`. Consistent use of `Optional` makes it clear what the user
has specified explicitly.
* `Config` is the "normalized config" and is produced by merging
`ConfigToml` with `ConfigOverrides`. Where `ConfigToml` contains a raw
`Option<Vec<SandboxPermission>>`, `Config` presents only the final
`SandboxPolicy`.

The changes to `core/src/exec.rs` and `core/src/linux.rs` merit extra
special attention to ensure we are faithfully mapping the
`SandboxPolicy` to the Seatbelt and Landlock configs, respectively.

Also, take note that `core/src/seatbelt_readonly_policy.sbpl` has been
renamed to `codex-rs/core/src/seatbelt_base_policy.sbpl` and that
`(allow file-read*)` has been removed from the `.sbpl` file as now this
is added to the policy in `core/src/exec.rs` when
`sandbox_policy.has_full_disk_read_access()` is `true`.
											
										
										
											2025-04-29 15:01:16 -07:00
-												feat: add support for read-only bind mounts in the linux sandbox (#9112)

### Motivation

- Landlock alone cannot prevent writes to sensitive in-repo files like
`.git/` when the repo root is writable, so explicit mount restrictions
are required for those paths.
- The sandbox must set up any mounts before calling Landlock so Landlock
can still be applied afterwards and the two mechanisms compose
correctly.

### Description

- Add a new `linux-sandbox` helper `apply_read_only_mounts` in
`linux-sandbox/src/mounts.rs` that: unshares namespaces, maps uids/gids
when required, makes mounts private, bind-mounts targets, and remounts
them read-only.
- Wire the mount step into the sandbox flow by calling
`apply_read_only_mounts(...)` before network/seccomp and before applying
Landlock rules in `linux-sandbox/src/landlock.rs`.
											
										
										
											2026-01-14 08:30:46 -08:00
+								fn is_git_pointer_file(path: &AbsolutePathBuf) -> bool {
 								    path.as_path().is_file() && path.as_path().file_name() == Some(OsStr::new(".git"))
 								}
 								fn resolve_gitdir_from_file(dot_git: &AbsolutePathBuf) -> Option<AbsolutePathBuf> {
 								    let contents = match std::fs::read_to_string(dot_git.as_path()) {
 								        Ok(contents) => contents,
 								        Err(err) => {
 								            error!(
 								                "Failed to read {path} for gitdir pointer: {err}",
 								                path = dot_git.as_path().display()
 								            );
 								            return None;
 								        }
 								    };
 								    let trimmed = contents.trim();
 								    let (_, gitdir_raw) = match trimmed.split_once(':') {
 								        Some(parts) => parts,
 								        None => {
 								            error!(
 								                "Expected {path} to contain a gitdir pointer, but it did not match `gitdir: <path>`.",
 								                path = dot_git.as_path().display()
 								            );
 								            return None;
 								        }
 								    };
 								    let gitdir_raw = gitdir_raw.trim();
 								    if gitdir_raw.is_empty() {
 								        error!(
 								            "Expected {path} to contain a gitdir pointer, but it was empty.",
 								            path = dot_git.as_path().display()
 								        );
 								        return None;
 								    }
 								    let base = match dot_git.as_path().parent() {
 								        Some(base) => base,
 								        None => {
 								            error!(
 								                "Unable to resolve parent directory for {path}.",
 								                path = dot_git.as_path().display()
 								            );
 								            return None;
 								        }
 								    };
 								    let gitdir_path = match AbsolutePathBuf::resolve_path_against_base(gitdir_raw, base) {
 								        Ok(path) => path,
 								        Err(err) => {
 								            error!(
 								                "Failed to resolve gitdir path {gitdir_raw} from {path}: {err}",
 								                path = dot_git.as_path().display()
 								            );
 								            return None;
 								        }
 								    };
 								    if !gitdir_path.as_path().exists() {
 								        error!(
 								            "Resolved gitdir path {path} does not exist.",
 								            path = gitdir_path.as_path().display()
 								        );
 								        return None;
 								    }
 								    Some(gitdir_path)
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// Event Queue Entry - events from agent
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub struct Event {
 								    /// Submission `id` that this event is correlated with.
 								    pub id: String,
 								    /// Payload
 								    pub msg: EventMsg,
 								}
 								/// Response event from the agent
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								/// NOTE: Make sure none of these values have optional types, as it will mess up the extension code-gen.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, Display, JsonSchema, TS)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								#[serde(tag = "type", rename_all = "snake_case")]
-												[app-server] small fixes for JSON schema export and one-of types (#6614)

A partner is consuming our generated JSON schema bundle for app-server
and identified a few issues:
- not all polymorphic / one-of types have a type descriminator
- `"$ref": "#/definitions/v2/SandboxPolicy"` is missing
- "Option<>" is an invalid schema name, and also unnecessary

This PR:
- adds the type descriminator to the various types that are missing it
except for `SessionSource` and `SubAgentSource` because they are
serialized to disk (adding this would break backwards compat for
resume), and they should not be necessary to consume for an integration
with app-server.
- removes the special handling in `export.rs` of various types like
SandboxPolicy, which turned out to be unnecessary and incorrect
- filters out `Option<>` which was auto-generated for request params
that don't need a body

For context, we currently pull in wayyy more types than we need through
the `EventMsg` god object which we are **not** planning to expose in API
v2 (this is how I suspect `SessionSource` and `SubAgentSource` are being
pulled in). But until we have all the necessary v2 notifications in
place that will allow us to remove `EventMsg`, we will keep exporting it
for now.
											
										
										
											2025-11-13 16:25:17 -08:00
+								#[ts(tag = "type")]
-												Serializing the `eventmsg` type to snake_case (#1709)

This was an abrupt change on our clients. We need to serialize as
snake_case.
											
										
										
											2025-07-28 10:26:27 -07:00
+								#[strum(serialize_all = "snake_case")]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub enum EventMsg {
 								    /// Error while executing a submission
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    Error(ErrorEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Add warning on compact (#6052)

This PR introduces the ability for `core` to send `warnings` as it can
send `errors. It also sends a warning on compaction.

<img width="811" height="187" alt="image"
src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a"
/>
											
										
										
											2025-10-31 13:27:33 -07:00
+								    /// Warning issued while processing a submission. Unlike `Error`, this
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								    /// indicates the turn continued but the user should still be notified.
-												Add warning on compact (#6052)

This PR introduces the ability for `core` to send `warnings` as it can
send `errors. It also sends a warning on compaction.

<img width="811" height="187" alt="image"
src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a"
/>
											
										
										
											2025-10-31 13:27:33 -07:00
+								    Warning(WarningEvent),
-												feat: add compaction event (#7289)


											
										
										
											2025-11-25 16:12:14 +00:00
+								    /// Conversation history was compacted (either automatically or manually).
 								    ContextCompacted(ContextCompactedEvent),
-												feat(app-server): thread/rollback API (#8454)

Add `thread/rollback` to app-server to support IDEs undo-ing the last N
turns of a thread.

For context, an IDE partner will be supporting an "undo" capability
where the IDE (the app-server client) will be responsible for reverting
the local changes made during the last turn. To support this well, we
also need a way to drop the last turn (or more generally, the last N
turns) from the agent's context. This is what `thread/rollback` does.

**Core idea**: A Thread rollback is represented as a persisted event
message (EventMsg::ThreadRollback) in the rollout JSONL file, not by
rewriting history. On resume, both the model's context (core replay) and
the UI turn list (app-server v2's thread history builder) apply these
markers so the pruned history is consistent across live conversations
and `thread/resume`.

Implementation notes:
- Rollback only affects agent context and appends to the rollout file;
clients are responsible for reverting files on disk.
- If a thread rollback is currently in progress, subsequent
`thread/rollback` calls are rejected.
- Because we use `CodexConversation::submit` and codex core tracks
active turns, returning an error on concurrent rollbacks is communicated
via an `EventMsg::Error` with a new variant
`CodexErrorInfo::ThreadRollbackFailed`. app-server watches for that and
sends the BAD_REQUEST RPC response.

Tests cover thread rollbacks in both core and app-server, including when
`num_turns` > existing turns (which clears all turns).

**Note**: this explicitly does **not** behave like `/undo` which we just
removed from the CLI, which does the opposite of what `thread/rollback`
does. `/undo` reverts local changes via ghost commits/snapshots and does
not modify the agent's context / conversation history.
											
										
										
											2026-01-06 13:23:48 -08:00
+								    /// Conversation history was rolled back by dropping the last N user turns.
 								    ThreadRolledBack(ThreadRolledBackEvent),
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								    /// Agent has started a turn.
 								    /// v1 wire format uses `task_started`; accept `turn_started` for v2 interop.
 								    #[serde(rename = "task_started", alias = "turn_started")]
 								    TurnStarted(TurnStartedEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								    /// Agent has completed all actions.
 								    /// v1 wire format uses `task_complete`; accept `turn_complete` for v2 interop.
 								    #[serde(rename = "task_complete", alias = "turn_complete")]
 								    TurnComplete(TurnCompleteEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								    /// Usage update for the current session, including totals and last turn.
 								    /// Optional means unknown — UIs should not display when `None`.
 								    TokenCount(TokenCountEvent),
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Agent text output message
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    AgentMessage(AgentMessageEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								    /// User/system input message (what was sent to the model)
 								    UserMessage(UserMessageEvent),
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								    /// Agent text output delta message
 								    AgentMessageDelta(AgentMessageDeltaEvent),
-												feat: include "reasoning" messages in Rust TUI (#892)

As shown in the screenshot, we now include reasoning messages from the
model in the TUI under the heading "codex reasoning":


![image](https://github.com/user-attachments/assets/d8eb3dc3-2f9f-4e95-847e-d24b421249a8)

To ensure these are visible by default when using `o4-mini`, this also
changes the default value for `summary` (formerly `generate_summary`,
which is deprecated in favor of `summary` according to the docs) from
unset to `"auto"`.
											
										
										
											2025-05-10 21:43:27 -07:00
+								    /// Reasoning event from agent.
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    AgentReasoning(AgentReasoningEvent),
-												feat: include "reasoning" messages in Rust TUI (#892)

As shown in the screenshot, we now include reasoning messages from the
model in the TUI under the heading "codex reasoning":


![image](https://github.com/user-attachments/assets/d8eb3dc3-2f9f-4e95-847e-d24b421249a8)

To ensure these are visible by default when using `o4-mini`, this also
changes the default value for `summary` (formerly `generate_summary`,
which is deprecated in favor of `summary` according to the docs) from
unset to `"auto"`.
											
										
										
											2025-05-10 21:43:27 -07:00
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								    /// Agent reasoning delta event from agent.
 								    AgentReasoningDelta(AgentReasoningDeltaEvent),
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								    /// Raw chain-of-thought from agent.
 								    AgentReasoningRawContent(AgentReasoningRawContentEvent),
 								    /// Agent reasoning content delta event from agent.
 								    AgentReasoningRawContentDelta(AgentReasoningRawContentDeltaEvent),
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
+								    /// Signaled when the model begins a new reasoning summary section (e.g., a new titled block).
 								    AgentReasoningSectionBreak(AgentReasoningSectionBreakEvent),
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// Ack the client's configure message.
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
+								    SessionConfigured(SessionConfiguredEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												core/tui: non-blocking MCP startup (#6334)

This makes MCP startup not block TUI startup. Messages sent while MCPs
are booting will be queued.


https://github.com/user-attachments/assets/96e1d234-5d8f-4932-a935-a675d35c05e0


Fixes #6317

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-11-17 11:26:11 -08:00
+								    /// Incremental MCP startup progress updates.
 								    McpStartupUpdate(McpStartupUpdateEvent),
 								    /// Aggregate MCP startup completion summary.
 								    McpStartupComplete(McpStartupCompleteEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    McpToolCallBegin(McpToolCallBeginEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    McpToolCallEnd(McpToolCallEndEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								    WebSearchBegin(WebSearchBeginEvent),
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								    WebSearchEnd(WebSearchEndEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Notification that the server is about to execute a command.
 								    ExecCommandBegin(ExecCommandBeginEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												collabse `stdout` and `stderr` delta events into one (#1787)


											
										
										
											2025-08-01 14:00:19 -07:00
+								    /// Incremental chunk of output from a running command.
 								    ExecCommandOutputDelta(ExecCommandOutputDeltaEvent),
-												feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
											
										
										
											2025-08-01 13:04:34 -07:00
-												chore: rework unified exec events (#7775)


											
										
										
											2025-12-10 10:30:38 +00:00
+								    /// Terminal interaction for an in-progress command (stdin sent and stdout observed).
 								    TerminalInteraction(TerminalInteractionEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    ExecCommandEnd(ExecCommandEndEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												show "Viewed Image" when the model views an image (#4475)

<img width="1022" height="339" alt="Screenshot 2025-09-29 at 4 22 00 PM"
src="https://github.com/user-attachments/assets/12da7358-19be-4010-a71b-496ede6dfbbf"
/>
											
										
										
											2025-10-02 11:36:03 -07:00
+								    /// Notification that the agent attached a local image via the view_image tool.
 								    ViewImageToolCall(ViewImageToolCallEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    ExecApprovalRequest(ExecApprovalRequestEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												Feat: request user input tool (#9472)

### Summary
* Add `requestUserInput` tool that the model can use for gather
feedback/asking question mid turn.


### Tool input schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput input",
  "type": "object",
  "additionalProperties": false,
  "required": ["questions"],
  "properties": {
    "questions": {
      "type": "array",
      "description": "Questions to show the user (1-3). Prefer 1 unless multiple independent decisions block progress.",
      "minItems": 1,
      "maxItems": 3,
      "items": {
        "type": "object",
        "additionalProperties": false,
        "required": ["id", "header", "question"],
        "properties": {
          "id": {
            "type": "string",
            "description": "Stable identifier for mapping answers (snake_case)."
          },
          "header": {
            "type": "string",
            "description": "Short header label shown in the UI (12 or fewer chars)."
          },
          "question": {
            "type": "string",
            "description": "Single-sentence prompt shown to the user."
          },
          "options": {
            "type": "array",
            "description": "Optional 2-3 mutually exclusive choices. Put the recommended option first and suffix its label with \"(Recommended)\". Only include \"Other\" option if we want to include a free form option. If the question is free form in nature, do not include any option.",
            "minItems": 2,
            "maxItems": 3,
            "items": {
              "type": "object",
              "additionalProperties": false,
              "required": ["value", "label", "description"],
              "properties": {
                "value": {
                  "type": "string",
                  "description": "Machine-readable value (snake_case)."
                },
                "label": {
                  "type": "string",
                  "description": "User-facing label (1-5 words)."
                },
                "description": {
                  "type": "string",
                  "description": "One short sentence explaining impact/tradeoff if selected."
                }
              }
            }
          }
        }
      }
    }
  }
}
```

### Tool output schema
```
{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "requestUserInput output",
  "type": "object",
  "additionalProperties": false,
  "required": ["answers"],
  "properties": {
    "answers": {
      "type": "object",
      "description": "Map of question id to user answer.",
      "additionalProperties": {
        "type": "object",
        "additionalProperties": false,
        "required": ["selected"],
        "properties": {
          "selected": {
            "type": "array",
            "items": { "type": "string" }
          },
          "other": {
            "type": ["string", "null"]
          }
        }
      }
    }
  }
}
```
											
										
										
											2026-01-19 10:17:30 -08:00
+								    RequestUserInput(RequestUserInputEvent),
-												feat: dynamic tools injection (#9539)

## Summary
Add dynamic tool injection to thread startup in API v2, wire dynamic
tool calls through the app server to clients, and plumb responses back
into the model tool pipeline.

### Flow (high level)
- Thread start injects `dynamic_tools` into the model tool list for that
thread (validation is done here).
- When the model emits a tool call for one of those names, core raises a
`DynamicToolCallRequest` event.
- The app server forwards it to the client as `item/tool/call`, waits
for the client’s response, then submits a `DynamicToolResponse` back to
core.
- Core turns that into a `function_call_output` in the next model
request so the model can continue.

### What changed
- Added dynamic tool specs to v2 thread start params and protocol types;
introduced `item/tool/call` (request/response) for dynamic tool
execution.
- Core now registers dynamic tool specs at request time and routes those
calls via a new dynamic tool handler.
- App server validates tool names/schemas, forwards dynamic tool call
requests to clients, and publishes tool outputs back into the session.
- Integration tests
											
										
										
											2026-01-26 11:06:44 +01:00
+								    DynamicToolCallRequest(DynamicToolCallRequest),
-												support MCP elicitations (#6947)

No support for request schema yet, but we'll at least show the message
and allow accept/decline.

<img width="823" height="551" alt="Screenshot 2025-11-21 at 2 44 05 PM"
src="https://github.com/user-attachments/assets/6fbb892d-ca12-4765-921e-9ac4b217534d"
/>
											
										
										
											2025-11-21 14:44:53 -08:00
+								    ElicitationRequest(ElicitationRequestEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    ApplyPatchApprovalRequest(ApplyPatchApprovalRequestEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												feat: deprecation warning (#5825)

<img width="955" height="311" alt="Screenshot 2025-10-28 at 14 26 25"
src="https://github.com/user-attachments/assets/99729b3d-3bc9-4503-aab3-8dc919220ab4"
/>
											
										
										
											2025-10-29 12:29:28 +00:00
+								    /// Notification advising the user that something they are using has been
 								    /// deprecated and should be phased out.
 								    DeprecationNotice(DeprecationNoticeEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    BackgroundEvent(BackgroundEventEvent),
-												feat: support mcp_servers in config.toml (#829)

This adds initial support for MCP servers in the style of Claude Desktop
and Cursor. Note this PR is the bare minimum to get things working end
to end: all configured MCP servers are launched every time Codex is run,
there is no recovery for MCP servers that crash, etc.

(Also, I took some shortcuts to change some fields of `Session` to be
`pub(crate)`, which also means there are circular deps between
`codex.rs` and `mcp_tool_call.rs`, but I will clean that up in a
subsequent PR.)

`codex-rs/README.md` is updated as part of this PR to explain how to use
this feature. There is a bit of plumbing to route the new settings from
`Config` to the business logic in `codex.rs`. The most significant
chunks for new code are in `mcp_connection_manager.rs` (which defines
the `McpConnectionManager` struct) and `mcp_tool_call.rs`, which is
responsible for tool calls.

This PR also introduces new `McpToolCallBegin` and `McpToolCallEnd`
event types to the protocol, but does not add any handlers for them.
(See https://github.com/openai/codex/pull/836 for initial usage.)

To test, I added the following to my `~/.codex/config.toml`:

```toml
# Local build of https://github.com/hideya/mcp-server-weather-js
[mcp_servers.weather]
command = "/Users/mbolin/code/mcp-server-weather-js/dist/index.js"
args = []
```

And then I ran the following:

```
codex-rs$ cargo run --bin codex exec 'what is the weather in san francisco'
[2025-05-06T22:40:05] Task started: 1
[2025-05-06T22:40:18] Agent message: Here’s the latest National Weather Service forecast for San Francisco (downtown, near 37.77° N, 122.42° W):

This Afternoon (Tue):
• Sunny, high near 69 °F
• West-southwest wind around 12 mph

Tonight:
• Partly cloudy, low around 52 °F
• SW wind 7–10 mph
...
```

Note that Codex itself is not able to make network calls, so it would
not normally be able to get live weather information like this. However,
the weather MCP is [currently] not run under the Codex sandbox, so it is
able to hit `api.weather.gov` and fetch current weather information.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/829).
* #836
* __->__ #829
											
										
										
											2025-05-06 15:47:59 -07:00
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								    UndoStarted(UndoStartedEvent),
 								    UndoCompleted(UndoCompletedEvent),
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								    /// Notification that a model stream experienced an error or disconnect
 								    /// and the system is handling it (e.g., retrying with backoff).
 								    StreamError(StreamErrorEvent),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Notification that the agent is about to apply a code patch. Mirrors
 								    /// `ExecCommandBegin` so front‑ends can show progress indicators.
 								    PatchApplyBegin(PatchApplyBeginEvent),
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Notification that a patch application has finished.
 								    PatchApplyEnd(PatchApplyEndEvent),
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								    TurnDiff(TurnDiffEvent),
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								    /// Response to GetHistoryEntryRequest.
 								    GetHistoryEntryResponse(GetHistoryEntryResponseEvent),
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								    /// List of MCP tools available to the agent.
 								    McpListToolsResponse(McpListToolsResponseEvent),
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								    /// List of custom prompts available to the agent.
 								    ListCustomPromptsResponse(ListCustomPromptsResponseEvent),
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								    /// List of skills available to the agent.
 								    ListSkillsResponse(ListSkillsResponseEvent),
-												Add public skills + improve repo skill discovery and error UX (#8098)

1. Adds SkillScope::Public end-to-end (core + protocol) and loads skills
from the public cache directory
2. Improves repo skill discovery by searching upward for the nearest
.codex/skills within a git repo
3. Deduplicates skills by name with deterministic ordering to avoid
duplicates across sources
4. Fixes garbled “Skill errors” overlay rendering by preventing pending
history lines from being injected during the modal
5. Updates the project docs “Skills” intro wording to avoid hardcoded
paths
											
										
										
											2025-12-17 01:35:49 -08:00
+								    /// Notification that skill data may have been updated and clients may want to reload.
 								    SkillsUpdateAvailable,
-												Add an experimental plan tool (#1726)

This adds a tool the model can call to update a plan. The tool doesn't
actually _do_ anything but it gives clients a chance to read and render
the structured plan. We will likely iterate on the prompt and tools
exposed for planning over time.
											
										
										
											2025-07-29 11:22:02 -07:00
+								    PlanUpdate(UpdatePlanArgs),
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								    TurnAborted(TurnAbortedEvent),
-												Flaky CI fix (#1647)

Flushing before sending `TaskCompleteEvent` and ending the submission
loop to avoid race conditions.
											
										
										
											2025-07-23 15:03:26 -07:00
+								    /// Notification that the agent is shutting down.
 								    ShutdownComplete,
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								    /// Entered review mode.
 								    EnteredReviewMode(ReviewRequest),
 								    /// Exited review mode with an optional final result to apply.
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								    ExitedReviewMode(ExitedReviewModeEvent),
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
-												Add a wrapper around raw response items (#5923)

We currently have nested enums when sending raw response items in the
app-server protocol. This makes downstream schemas confusing because we
need to embed `type`-discriminated enums within each other.

This PR adds a small wrapper around the response item so we can keep the
schemas separate
											
										
										
											2025-10-29 13:32:40 -07:00
+								    RawResponseItem(RawResponseItemEvent),
-												[codex][app-server] introduce codex/event/raw_item events (#5578)


											
										
										
											2025-10-24 15:41:52 -07:00
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    ItemStarted(ItemStartedEvent),
 								    ItemCompleted(ItemCompletedEvent),
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
 								    AgentMessageContentDelta(AgentMessageContentDeltaEvent),
 								    ReasoningContentDelta(ReasoningContentDeltaEvent),
 								    ReasoningRawContentDelta(ReasoningRawContentDeltaEvent),
-												feat: emit events around collab tools (#9095)

Emit the following events around the collab tools. On the `app-server`
this will be under `item/started` and `item/completed`
```
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the newly spawned agent, if it was created.
    pub new_thread_id: Option<ThreadId>,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
    /// Last known status of the new agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingBeginEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingEndEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Last known status of the receiver agent reported to the sender agent before
    /// the close.
    pub status: AgentStatus,
}
```
											
										
										
											2026-01-14 17:55:57 +00:00
 								    /// Collab interaction: agent spawn begin.
 								    CollabAgentSpawnBegin(CollabAgentSpawnBeginEvent),
 								    /// Collab interaction: agent spawn end.
 								    CollabAgentSpawnEnd(CollabAgentSpawnEndEvent),
 								    /// Collab interaction: agent interaction begin.
 								    CollabAgentInteractionBegin(CollabAgentInteractionBeginEvent),
 								    /// Collab interaction: agent interaction end.
 								    CollabAgentInteractionEnd(CollabAgentInteractionEndEvent),
 								    /// Collab interaction: waiting begin.
 								    CollabWaitingBegin(CollabWaitingBeginEvent),
 								    /// Collab interaction: waiting end.
 								    CollabWaitingEnd(CollabWaitingEndEvent),
 								    /// Collab interaction: close begin.
 								    CollabCloseBegin(CollabCloseBeginEvent),
 								    /// Collab interaction: close end.
 								    CollabCloseEnd(CollabCloseEndEvent),
 								}
 								impl From<CollabAgentSpawnBeginEvent> for EventMsg {
 								    fn from(event: CollabAgentSpawnBeginEvent) -> Self {
 								        EventMsg::CollabAgentSpawnBegin(event)
 								    }
 								}
 								impl From<CollabAgentSpawnEndEvent> for EventMsg {
 								    fn from(event: CollabAgentSpawnEndEvent) -> Self {
 								        EventMsg::CollabAgentSpawnEnd(event)
 								    }
 								}
 								impl From<CollabAgentInteractionBeginEvent> for EventMsg {
 								    fn from(event: CollabAgentInteractionBeginEvent) -> Self {
 								        EventMsg::CollabAgentInteractionBegin(event)
 								    }
 								}
 								impl From<CollabAgentInteractionEndEvent> for EventMsg {
 								    fn from(event: CollabAgentInteractionEndEvent) -> Self {
 								        EventMsg::CollabAgentInteractionEnd(event)
 								    }
 								}
 								impl From<CollabWaitingBeginEvent> for EventMsg {
 								    fn from(event: CollabWaitingBeginEvent) -> Self {
 								        EventMsg::CollabWaitingBegin(event)
 								    }
 								}
 								impl From<CollabWaitingEndEvent> for EventMsg {
 								    fn from(event: CollabWaitingEndEvent) -> Self {
 								        EventMsg::CollabWaitingEnd(event)
 								    }
 								}
 								impl From<CollabCloseBeginEvent> for EventMsg {
 								    fn from(event: CollabCloseBeginEvent) -> Self {
 								        EventMsg::CollabCloseBegin(event)
 								    }
 								}
 								impl From<CollabCloseEndEvent> for EventMsg {
 								    fn from(event: CollabCloseEndEvent) -> Self {
 								        EventMsg::CollabCloseEnd(event)
 								    }
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								}
-												feat: drop agent bus and store the agent status in codex directly (#8788)


											
										
										
											2026-01-06 19:44:39 +00:00
+								/// Agent lifecycle status, derived from emitted events.
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default)]
 								#[serde(rename_all = "snake_case")]
 								#[ts(rename_all = "snake_case")]
 								pub enum AgentStatus {
 								    /// Agent is waiting for initialization.
 								    #[default]
 								    PendingInit,
 								    /// Agent is currently running.
 								    Running,
 								    /// Agent is done. Contains the final assistant message.
 								    Completed(Option<String>),
 								    /// Agent encountered an error.
 								    Errored(String),
-												feat: add wait tool implementation for collab (#9088)

Add implementation for the `wait` tool.

For this we consider all status different from `PendingInit` and
`Running` as terminal. The `wait` tool call will return either after a
given timeout or when the tool reaches a non-terminal status.

A few points to note:
* The usage of a channel is preferred to prevent some races (just
looping on `get_status()` could "miss" a terminal status)
* The order of operations is very important, we need to first subscribe
and then check the last known status to prevent race conditions
* If the channel gets dropped, we return an error on purpose
											
										
										
											2026-01-12 12:16:24 +00:00
+								    /// Agent has been shutdown.
-												feat: drop agent bus and store the agent status in codex directly (#8788)


											
										
										
											2026-01-06 19:44:39 +00:00
+								    Shutdown,
 								    /// Agent is not found.
 								    NotFound,
 								}
-												[app-server & core] introduce new codex error code and v2 app-server error events (#6938)

This PR does two things:
1. populate a new `codex_error_code` protocol in error events sent from
core to client;
2. old v1 core events `codex/event/stream_error` and `codex/event/error`
will now both become `error`. We also show codex error code for
turncompleted -> error status.

new events in app server test:
```
< {
<   "method": "codex/event/stream_error",
<   "params": {
<     "conversationId": "019aa34c-0c14-70e0-9706-98520a760d67",
<     "id": "0",
<     "msg": {
<       "codex_error_code": {
<         "response_stream_disconnected": {
<           "http_status_code": 401
<         }
<       },
<       "message": "Reconnecting... 2/5",
<       "type": "stream_error"
<     }
<   }
< }

 {
<   "method": "error",
<   "params": {
<     "error": {
<       "codexErrorCode": {
<         "responseStreamDisconnected": {
<           "httpStatusCode": 401
<         }
<       },
<       "message": "Reconnecting... 2/5"
<     }
<   }
< }

< {
<   "method": "turn/completed",
<   "params": {
<     "turn": {
<       "error": {
<         "codexErrorCode": {
<           "responseTooManyFailedAttempts": {
<             "httpStatusCode": 401
<           }
<         },
<         "message": "exceeded retry limit, last status: 401 Unauthorized, request id: 9a1b495a1a97ed3e-SJC"
<       },
<       "id": "0",
<       "items": [],
<       "status": "failed"
<     }
<   }
< }
```
											
										
										
											2025-11-20 15:06:55 -08:00
+								/// Codex errors that we expose to clients.
 								#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
 								#[serde(rename_all = "snake_case")]
 								#[ts(rename_all = "snake_case")]
 								pub enum CodexErrorInfo {
 								    ContextWindowExceeded,
 								    UsageLimitExceeded,
 								    HttpConnectionFailed {
 								        http_status_code: Option<u16>,
 								    },
 								    /// Failed to connect to the response SSE stream.
 								    ResponseStreamConnectionFailed {
 								        http_status_code: Option<u16>,
 								    },
 								    InternalServerError,
 								    Unauthorized,
 								    BadRequest,
 								    SandboxError,
 								    /// The response SSE stream disconnected in the middle of a turnbefore completion.
 								    ResponseStreamDisconnected {
 								        http_status_code: Option<u16>,
 								    },
 								    /// Reached the retry limit for responses.
 								    ResponseTooManyFailedAttempts {
 								        http_status_code: Option<u16>,
 								    },
-												feat(app-server): thread/rollback API (#8454)

Add `thread/rollback` to app-server to support IDEs undo-ing the last N
turns of a thread.

For context, an IDE partner will be supporting an "undo" capability
where the IDE (the app-server client) will be responsible for reverting
the local changes made during the last turn. To support this well, we
also need a way to drop the last turn (or more generally, the last N
turns) from the agent's context. This is what `thread/rollback` does.

**Core idea**: A Thread rollback is represented as a persisted event
message (EventMsg::ThreadRollback) in the rollout JSONL file, not by
rewriting history. On resume, both the model's context (core replay) and
the UI turn list (app-server v2's thread history builder) apply these
markers so the pruned history is consistent across live conversations
and `thread/resume`.

Implementation notes:
- Rollback only affects agent context and appends to the rollout file;
clients are responsible for reverting files on disk.
- If a thread rollback is currently in progress, subsequent
`thread/rollback` calls are rejected.
- Because we use `CodexConversation::submit` and codex core tracks
active turns, returning an error on concurrent rollbacks is communicated
via an `EventMsg::Error` with a new variant
`CodexErrorInfo::ThreadRollbackFailed`. app-server watches for that and
sends the BAD_REQUEST RPC response.

Tests cover thread rollbacks in both core and app-server, including when
`num_turns` > existing turns (which clears all turns).

**Note**: this explicitly does **not** behave like `/undo` which we just
removed from the CLI, which does the opposite of what `thread/rollback`
does. `/undo` reverts local changes via ghost commits/snapshots and does
not modify the agent's context / conversation history.
											
										
										
											2026-01-06 13:23:48 -08:00
+								    ThreadRollbackFailed,
-												[app-server & core] introduce new codex error code and v2 app-server error events (#6938)

This PR does two things:
1. populate a new `codex_error_code` protocol in error events sent from
core to client;
2. old v1 core events `codex/event/stream_error` and `codex/event/error`
will now both become `error`. We also show codex error code for
turncompleted -> error status.

new events in app server test:
```
< {
<   "method": "codex/event/stream_error",
<   "params": {
<     "conversationId": "019aa34c-0c14-70e0-9706-98520a760d67",
<     "id": "0",
<     "msg": {
<       "codex_error_code": {
<         "response_stream_disconnected": {
<           "http_status_code": 401
<         }
<       },
<       "message": "Reconnecting... 2/5",
<       "type": "stream_error"
<     }
<   }
< }

 {
<   "method": "error",
<   "params": {
<     "error": {
<       "codexErrorCode": {
<         "responseStreamDisconnected": {
<           "httpStatusCode": 401
<         }
<       },
<       "message": "Reconnecting... 2/5"
<     }
<   }
< }

< {
<   "method": "turn/completed",
<   "params": {
<     "turn": {
<       "error": {
<         "codexErrorCode": {
<           "responseTooManyFailedAttempts": {
<             "httpStatusCode": 401
<           }
<         },
<         "message": "exceeded retry limit, last status: 401 Unauthorized, request id: 9a1b495a1a97ed3e-SJC"
<       },
<       "id": "0",
<       "items": [],
<       "status": "failed"
<     }
<   }
< }
```
											
										
										
											2025-11-20 15:06:55 -08:00
+								    Other,
 								}
-												Add a wrapper around raw response items (#5923)

We currently have nested enums when sending raw response items in the
app-server protocol. This makes downstream schemas confusing because we
need to embed `type`-discriminated enums within each other.

This PR adds a small wrapper around the response item so we can keep the
schemas separate
											
										
										
											2025-10-29 13:32:40 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct RawResponseItemEvent {
 								    pub item: ResponseItem,
 								}
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct ItemStartedEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    pub thread_id: ThreadId,
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    pub turn_id: String,
 								    pub item: TurnItem,
 								}
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								impl HasLegacyEvent for ItemStartedEvent {
 								    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
 								        match &self.item {
 								            TurnItem::WebSearch(item) => vec![EventMsg::WebSearchBegin(WebSearchBeginEvent {
 								                call_id: item.id.clone(),
 								            })],
 								            _ => Vec::new(),
 								        }
 								    }
 								}
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct ItemCompletedEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    pub thread_id: ThreadId,
-												Add ItemStarted/ItemCompleted events for UserInputItem (#5306)

Adds a new ItemStarted event and delivers UserMessage as the first item
type (more to come).


Renames `InputItem` to `UserInput` considering we're using the `Item`
suffix for actual items.
											
										
										
											2025-10-20 13:34:44 -07:00
+								    pub turn_id: String,
 								    pub item: TurnItem,
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								}
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								pub trait HasLegacyEvent {
 								    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg>;
 								}
 								impl HasLegacyEvent for ItemCompletedEvent {
 								    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
 								        self.item.as_legacy_events(show_raw_agent_reasoning)
 								    }
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct AgentMessageContentDeltaEvent {
 								    pub thread_id: String,
 								    pub turn_id: String,
 								    pub item_id: String,
 								    pub delta: String,
 								}
 								impl HasLegacyEvent for AgentMessageContentDeltaEvent {
 								    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
 								        vec![EventMsg::AgentMessageDelta(AgentMessageDeltaEvent {
 								            delta: self.delta.clone(),
 								        })]
 								    }
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct ReasoningContentDeltaEvent {
 								    pub thread_id: String,
 								    pub turn_id: String,
 								    pub item_id: String,
 								    pub delta: String,
-												[App-server] add new v2 events:`item/reasoning/delta`, `item/agentMessage/delta` & `item/reasoning/summaryPartAdded` (#6559)

core event to app server event mapping:
1. `codex/event/reasoning_content_delta` ->
`item/reasoning/summaryTextDelta`.
2. `codex/event/reasoning_raw_content_delta` ->
`item/reasoning/textDelta`
3. `codex/event/agent_message_content_delta` →
`item/agentMessage/delta`.
4. `codex/event/agent_reasoning_section_break` ->
`item/reasoning/summaryPartAdded`.

Also added a change in core to pass down content index, summary index
and item id from events.

Tested with the `git checkout owen/app_server_test_client && cargo run
-p codex-app-server-test-client -- send-message-v2 "hello"` and verified
that new events are emitted correctly.
											
										
										
											2025-11-13 16:25:01 -08:00
+								    // load with default value so it's backward compatible with the old format.
 								    #[serde(default)]
 								    pub summary_index: i64,
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								}
 								impl HasLegacyEvent for ReasoningContentDeltaEvent {
 								    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
 								        vec![EventMsg::AgentReasoningDelta(AgentReasoningDeltaEvent {
 								            delta: self.delta.clone(),
 								        })]
 								    }
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, TS, JsonSchema)]
 								pub struct ReasoningRawContentDeltaEvent {
 								    pub thread_id: String,
 								    pub turn_id: String,
 								    pub item_id: String,
 								    pub delta: String,
-												[App-server] add new v2 events:`item/reasoning/delta`, `item/agentMessage/delta` & `item/reasoning/summaryPartAdded` (#6559)

core event to app server event mapping:
1. `codex/event/reasoning_content_delta` ->
`item/reasoning/summaryTextDelta`.
2. `codex/event/reasoning_raw_content_delta` ->
`item/reasoning/textDelta`
3. `codex/event/agent_message_content_delta` →
`item/agentMessage/delta`.
4. `codex/event/agent_reasoning_section_break` ->
`item/reasoning/summaryPartAdded`.

Also added a change in core to pass down content index, summary index
and item id from events.

Tested with the `git checkout owen/app_server_test_client && cargo run
-p codex-app-server-test-client -- send-message-v2 "hello"` and verified
that new events are emitted correctly.
											
										
										
											2025-11-13 16:25:01 -08:00
+								    // load with default value so it's backward compatible with the old format.
 								    #[serde(default)]
 								    pub content_index: i64,
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								}
 								impl HasLegacyEvent for ReasoningRawContentDeltaEvent {
 								    fn as_legacy_events(&self, _: bool) -> Vec<EventMsg> {
 								        vec![EventMsg::AgentReasoningRawContentDelta(
 								            AgentReasoningRawContentDeltaEvent {
 								                delta: self.delta.clone(),
 								            },
 								        )]
 								    }
 								}
 								impl HasLegacyEvent for EventMsg {
 								    fn as_legacy_events(&self, show_raw_agent_reasoning: bool) -> Vec<EventMsg> {
 								        match self {
-												fix: handle all web_search actions and in progress invocations (#9960)

### Summary
- Parse all `web_search` tool actions (`search`, `find_in_page`,
`open_page`).
- Previously we only parsed + displayed `search`, which made the TUI
appear to pause when the other actions were being used.
- Show in progress `web_search` calls as `Searching the web`
  - Previously we only showed completed tool calls

<img width="308" height="149" alt="image"
src="https://github.com/user-attachments/assets/90a4e8ff-b06a-48ff-a282-b57b31121845"
/>

### Tests
Added + updated tests, tested locally

### Follow ups
Update VSCode extension to display these as well
											
										
										
											2026-01-26 19:33:48 -08:00
+								            EventMsg::ItemStarted(event) => event.as_legacy_events(show_raw_agent_reasoning),
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								            EventMsg::ItemCompleted(event) => event.as_legacy_events(show_raw_agent_reasoning),
 								            EventMsg::AgentMessageContentDelta(event) => {
 								                event.as_legacy_events(show_raw_agent_reasoning)
 								            }
 								            EventMsg::ReasoningContentDelta(event) => {
 								                event.as_legacy_events(show_raw_agent_reasoning)
 								            }
 								            EventMsg::ReasoningRawContentDelta(event) => {
 								                event.as_legacy_events(show_raw_agent_reasoning)
 								            }
 								            _ => Vec::new(),
 								        }
 								    }
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Fix EventMsg Optional (#3604)


											
										
										
											2025-09-14 17:34:33 -07:00
+								pub struct ExitedReviewModeEvent {
 								    pub review_output: Option<ReviewOutputEvent>,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								// Individual event payload types matching each `EventMsg` variant.
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct ErrorEvent {
 								    pub message: String,
-												[app-server & core] introduce new codex error code and v2 app-server error events (#6938)

This PR does two things:
1. populate a new `codex_error_code` protocol in error events sent from
core to client;
2. old v1 core events `codex/event/stream_error` and `codex/event/error`
will now both become `error`. We also show codex error code for
turncompleted -> error status.

new events in app server test:
```
< {
<   "method": "codex/event/stream_error",
<   "params": {
<     "conversationId": "019aa34c-0c14-70e0-9706-98520a760d67",
<     "id": "0",
<     "msg": {
<       "codex_error_code": {
<         "response_stream_disconnected": {
<           "http_status_code": 401
<         }
<       },
<       "message": "Reconnecting... 2/5",
<       "type": "stream_error"
<     }
<   }
< }

 {
<   "method": "error",
<   "params": {
<     "error": {
<       "codexErrorCode": {
<         "responseStreamDisconnected": {
<           "httpStatusCode": 401
<         }
<       },
<       "message": "Reconnecting... 2/5"
<     }
<   }
< }

< {
<   "method": "turn/completed",
<   "params": {
<     "turn": {
<       "error": {
<         "codexErrorCode": {
<           "responseTooManyFailedAttempts": {
<             "httpStatusCode": 401
<           }
<         },
<         "message": "exceeded retry limit, last status: 401 Unauthorized, request id: 9a1b495a1a97ed3e-SJC"
<       },
<       "id": "0",
<       "items": [],
<       "status": "failed"
<     }
<   }
< }
```
											
										
										
											2025-11-20 15:06:55 -08:00
+								    #[serde(default)]
-												[app-server] update doc with codex error info (#6941)

Document new codex error info. Also fixed the name from
`codex_error_code` to `codex_error_info`.
											
										
										
											2025-11-20 17:02:37 -08:00
+								    pub codex_error_info: Option<CodexErrorInfo>,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Add warning on compact (#6052)

This PR introduces the ability for `core` to send `warnings` as it can
send `errors. It also sends a warning on compaction.

<img width="811" height="187" alt="image"
src="https://github.com/user-attachments/assets/0947a42d-b720-420d-b7fd-115f8a65a46a"
/>
											
										
										
											2025-10-31 13:27:33 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct WarningEvent {
 								    pub message: String,
 								}
-												feat: add compaction event (#7289)


											
										
										
											2025-11-25 16:12:14 +00:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct ContextCompactedEvent;
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								pub struct TurnCompleteEvent {
-												feat: experimental --output-last-message flag to exec subcommand (#1037)

This introduces an experimental `--output-last-message` flag that can be
used to identify a file where the final message from the agent will be
written. Two use cases:

- Ultimately, we will likely add a `--quiet` option to `exec`, but even
if the user does not want any output written to the terminal, they
probably want to know what the agent did. Writing the output to a file
makes it possible to get that information in a clean way.
- Relatedly, when using `exec` in CI, it is easier to review the
transcript written "normally," (i.e., not as JSON or something with
extra escapes), but getting programmatic access to the last message is
likely helpful, so writing the last message to a file gets the best of
both worlds.

I am calling this "experimental" because it is possible that we are
overfitting and will want a more general solution to this problem that
would justify removing this flag.
											
										
										
											2025-05-19 16:08:18 -07:00
+								    pub last_agent_message: Option<String>,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												renaming: task to turn (#8963)


											
										
										
											2026-01-09 17:31:17 +00:00
+								pub struct TurnStartedEvent {
-												Merge Modelfamily into modelinfo (#8763)

- Merge ModelFamily into ModelInfo
- Remove logic for adding instructions to apply patch
- Add compaction limit and visible context window to `ModelInfo`
											
										
										
											2026-01-07 10:35:09 -08:00
+								    // TODO(aibrahim): make this not optional
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub model_context_window: Option<i64>,
-												send context window with task started (#2752)

- Send context window with task started
- Accounting for changing the model per turn
											
										
										
											2025-08-27 00:04:21 -07:00
+								}
-												Account for last token count on resume (#8677)

last token count in context manager is initialized to 0. Gets populated
only on events from server.

This PR populates it on resume so we can decide if we need to compact or
not.
											
										
										
											2026-01-02 15:20:20 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, Default, PartialEq, Eq, JsonSchema, TS)]
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								pub struct TokenUsage {
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub input_tokens: i64,
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub cached_input_tokens: i64,
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub output_tokens: i64,
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub reasoning_output_tokens: i64,
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub total_tokens: i64,
-												feat: show number of tokens remaining in UI (#1388)

When using the OpenAI Responses API, we now record the `usage` field for
a `"response.completed"` event, which includes metrics about the number
of tokens consumed. We also introduce `openai_model_info.rs`, which
includes current data about the most common OpenAI models available via
the API (specifically `context_window` and `max_output_tokens`). If
Codex does not recognize the model, you can set `model_context_window`
and `model_max_output_tokens` explicitly in `config.toml`.

When then introduce a new event type to `protocol.rs`, `TokenCount`,
which includes the `TokenUsage` for the most recent turn.

Finally, we update the TUI to record the running sum of tokens used so
the percentage of available context window remaining can be reported via
the placeholder text for the composer:

![Screenshot 2025-06-25 at 11 20
55 PM](https://github.com/user-attachments/assets/6fd6982f-7247-4f14-84b2-2e600cb1fd49)

We could certainly get much fancier with this (such as reporting the
estimated cost of the conversation), but for now, we are just trying to
achieve feature parity with the TypeScript CLI.

Though arguably this improves upon the TypeScript CLI, as the TypeScript
CLI uses heuristics to estimate the number of tokens used rather than
using the `usage` information directly:


https://github.com/openai/codex/blob/296996d74e345b1b05d8c3451a06ace21c5ada96/codex-cli/src/utils/approximate-tokens-used.ts#L3-L16

Fixes https://github.com/openai/codex/issues/1242
											
										
										
											2025-06-25 23:31:11 -07:00
+								}
-												Account for last token count on resume (#8677)

last token count in context manager is initialized to 0. Gets populated
only on events from server.

This PR populates it on resume so we can decide if we need to compact or
not.
											
										
										
											2026-01-02 15:20:20 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								pub struct TokenUsageInfo {
 								    pub total_token_usage: TokenUsage,
 								    pub last_token_usage: TokenUsage,
-												Merge Modelfamily into modelinfo (#8763)

- Merge ModelFamily into ModelInfo
- Remove logic for adding instructions to apply patch
- Add compaction limit and visible context window to `ModelInfo`
											
										
										
											2026-01-07 10:35:09 -08:00
+								    // TODO(aibrahim): make this not optional
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number | null")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub model_context_window: Option<i64>,
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								}
 								impl TokenUsageInfo {
 								    pub fn new_or_append(
 								        info: &Option<TokenUsageInfo>,
 								        last: &Option<TokenUsage>,
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								        model_context_window: Option<i64>,
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								    ) -> Option<Self> {
 								        if info.is_none() && last.is_none() {
 								            return None;
 								        }
 								        let mut info = match info {
 								            Some(info) => info.clone(),
 								            None => Self {
 								                total_token_usage: TokenUsage::default(),
 								                last_token_usage: TokenUsage::default(),
 								                model_context_window,
 								            },
 								        };
 								        if let Some(last) = last {
 								            info.append_last_usage(last);
 								        }
 								        Some(info)
 								    }
 								    pub fn append_last_usage(&mut self, last: &TokenUsage) {
 								        self.total_token_usage.add_assign(last);
 								        self.last_token_usage = last.clone();
 								    }
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn fill_to_context_window(&mut self, context_window: i64) {
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								        let previous_total = self.total_token_usage.total_tokens;
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								        let delta = (context_window - previous_total).max(0);
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
 								        self.model_context_window = Some(context_window);
 								        self.total_token_usage = TokenUsage {
 								            total_tokens: context_window,
 								            ..TokenUsage::default()
 								        };
 								        self.last_token_usage = TokenUsage {
 								            total_tokens: delta,
 								            ..TokenUsage::default()
 								        };
 								    }
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn full_context_window(context_window: i64) -> Self {
-												Surface context window error to the client (#4675)

In the past, we were treating `input exceeded context window` as a
streaming error and retrying on it. Retrying on it has no point because
it won't change the behavior. In this PR, we surface the error to the
client without retry and also send a token count event to indicate that
the context window is full.

<img width="650" height="125" alt="image"
src="https://github.com/user-attachments/assets/c26b1213-4c27-4bfc-90f4-51a270a3efd5"
/>
											
										
										
											2025-10-04 18:40:06 -07:00
+								        let mut info = Self {
 								            total_token_usage: TokenUsage::default(),
 								            last_token_usage: TokenUsage::default(),
 								            model_context_window: Some(context_window),
 								        };
 								        info.fill_to_context_window(context_window);
 								        info
 								    }
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								pub struct TokenCountEvent {
 								    pub info: Option<TokenUsageInfo>,
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								    pub rate_limits: Option<RateLimitSnapshot>,
-												Forward Rate limits to the UI (#3965)

We currently get information about rate limits in the response headers.
We want to forward them to the clients to have better transparency.
UI/UX plans have been discussed and this information is needed.
											
										
										
											2025-09-20 21:26:16 -07:00
+								}
-												[app-server] read rate limits API (#5302)

Adds a `GET account/rateLimits/read` API to app-server. This calls the
codex backend to fetch the user's current rate limits.

This would be helpful in checking rate limits without having to send a
message.

For calling the codex backend usage API, I generated the types and
manually copied the relevant ones into `codex-backend-openapi-types`.
It'll be nice to extend our internal openapi generator to support Rust
so we don't have to run these manual steps.

# External (non-OpenAI) Pull Request Requirements

Before opening this Pull Request, please read the dedicated
"Contributing" markdown file or your PR may be closed:
https://github.com/openai/codex/blob/main/docs/contributing.md

If your PR conforms to our contribution guidelines, replace this text
with a detailed and high quality description of your changes.
											
										
										
											2025-10-20 14:11:54 -07:00
+								#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
-												Send limits when getting rate limited (#4102)

Users need visibility on rate limits when they are rate limited.
											
										
										
											2025-09-23 15:56:34 -07:00
+								pub struct RateLimitSnapshot {
-												Add Reset in for rate limits (#4111)

- Parse the headers
- Reorganize the struct because it's getting too long
- show the resets at in the tui

<img width="324" height="79" alt="image"
src="https://github.com/user-attachments/assets/ca15cd48-f112-4556-91ab-1e3a9bc4683d"
/>
											
										
										
											2025-09-24 08:31:08 -07:00
+								    pub primary: Option<RateLimitWindow>,
 								    pub secondary: Option<RateLimitWindow>,
-												storing credits (#6858)

Expand the rate-limit cache/TUI: store credit snapshots alongside
primary and secondary windows, render “Credits” when the backend reports
they exist (unlimited vs rounded integer balances)
											
										
										
											2025-11-19 10:49:35 -08:00
+								    pub credits: Option<CreditsSnapshot>,
-												fix: taking plan type from usage endpoint instead of thru auth token (#7610)

pull plan type from the usage endpoint, persist it in session state /
tui state, and propagate through rate limit snapshots
											
										
										
											2025-12-04 23:34:13 -08:00
+								    pub plan_type: Option<crate::account::PlanType>,
-												Add Reset in for rate limits (#4111)

- Parse the headers
- Reorganize the struct because it's getting too long
- show the resets at in the tui

<img width="324" height="79" alt="image"
src="https://github.com/user-attachments/assets/ca15cd48-f112-4556-91ab-1e3a9bc4683d"
/>
											
										
										
											2025-09-24 08:31:08 -07:00
+								}
-												[app-server] read rate limits API (#5302)

Adds a `GET account/rateLimits/read` API to app-server. This calls the
codex backend to fetch the user's current rate limits.

This would be helpful in checking rate limits without having to send a
message.

For calling the codex backend usage API, I generated the types and
manually copied the relevant ones into `codex-backend-openapi-types`.
It'll be nice to extend our internal openapi generator to support Rust
so we don't have to run these manual steps.

# External (non-OpenAI) Pull Request Requirements

Before opening this Pull Request, please read the dedicated
"Contributing" markdown file or your PR may be closed:
https://github.com/openai/codex/blob/main/docs/contributing.md

If your PR conforms to our contribution guidelines, replace this text
with a detailed and high quality description of your changes.
											
										
										
											2025-10-20 14:11:54 -07:00
+								#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
-												Add Reset in for rate limits (#4111)

- Parse the headers
- Reorganize the struct because it's getting too long
- show the resets at in the tui

<img width="324" height="79" alt="image"
src="https://github.com/user-attachments/assets/ca15cd48-f112-4556-91ab-1e3a9bc4683d"
/>
											
										
										
											2025-09-24 08:31:08 -07:00
+								pub struct RateLimitWindow {
 								    /// Percentage (0-100) of the window that has been consumed.
 								    pub used_percent: f64,
 								    /// Rolling window duration, in minutes.
-												[Codex] Use Number instead of BigInt for TokenCountEvent (#4856)

Adjust to use typescript number so reduce casting and normalizing code
for VSCE since js supports up to 2^53-1
											
										
										
											2025-10-06 18:59:37 -07:00
+								    #[ts(type = "number | null")]
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub window_minutes: Option<i64>,
-												Use int timestamps for rate limit reset_at (#5383)

The backend will be returning unix timestamps (seconds since epoch)
instead of RFC 3339 strings. This will make it more ergonomic for
developers to integrate against - no string parsing.
											
										
										
											2025-10-20 12:26:46 -07:00
+								    /// Unix timestamp (seconds since epoch) when the window resets.
 								    #[ts(type = "number | null")]
 								    pub resets_at: Option<i64>,
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								}
-												storing credits (#6858)

Expand the rate-limit cache/TUI: store credit snapshots alongside
primary and secondary windows, render “Credits” when the backend reports
they exist (unlimited vs rounded integer balances)
											
										
										
											2025-11-19 10:49:35 -08:00
+								#[derive(Debug, Clone, PartialEq, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct CreditsSnapshot {
 								    pub has_credits: bool,
 								    pub unlimited: bool,
 								    pub balance: Option<String>,
 								}
-												Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
											
										
										
											2025-09-04 16:34:14 -07:00
+								// Includes prompts, tools and space to call compact.
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								const BASELINE_TOKENS: i64 = 12000;
-												Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
											
										
										
											2025-09-04 16:34:14 -07:00
-												First pass at a TUI onboarding (#1876)

This sets up the scaffolding and basic flow for a TUI onboarding
experience. It covers sign in with ChatGPT, env auth, as well as some
safety guidance.

Next up:
1. Replace the git warning screen
2. Use this to configure default approval/sandbox modes


Note the shimmer flashes are from me slicing the video, not jank.

https://github.com/user-attachments/assets/0fbe3479-fdde-41f3-87fb-a7a83ab895b8
											
										
										
											2025-08-06 15:22:14 -07:00
+								impl TokenUsage {
 								    pub fn is_zero(&self) -> bool {
 								        self.total_tokens == 0
 								    }
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn cached_input(&self) -> i64 {
 								        self.cached_input_tokens.max(0)
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								    }
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn non_cached_input(&self) -> i64 {
 								        (self.input_tokens - self.cached_input()).max(0)
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								    }
 								    /// Primary count for display as a single absolute value: non-cached input + output.
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn blended_total(&self) -> i64 {
 								        (self.non_cached_input() + self.output_tokens.max(0)).max(0)
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								    }
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn tokens_in_context_window(&self) -> i64 {
-												Include reasoning tokens in the context window calculation (#6161)

This value is used to determine whether mid-turn compaction is required.
Reasoning items are only excluded between turns (and soon will start to
be preserved even across turns) so it's incorrect to subtract
reasoning_output_tokens mid term.

This will result in higher values reported between turns but we are also
looking into preserving reasoning items for the entire conversation to
improve performance and caching.
											
										
										
											2025-11-03 10:02:23 -08:00
+								        self.total_tokens
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								    }
-												fix: exclude sysprompt etc from context left % (#2446)

- Prevents the % left indicator from immediately decrementing to ~97%.
- Tested by prompting "hi" and noting it only decremented to 99%. And by
adding a bunch of debug logs and observing numbers.
											
										
										
											2025-08-19 08:20:32 -07:00
 								    /// Estimate the remaining user-controllable percentage of the model's context window.
 								    ///
 								    /// `context_window` is the total size of the model's context window.
-												Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
											
										
										
											2025-09-04 16:34:14 -07:00
+								    /// `BASELINE_TOKENS` should capture tokens that are always present in
-												fix: exclude sysprompt etc from context left % (#2446)

- Prevents the % left indicator from immediately decrementing to ~97%.
- Tested by prompting "hi" and noting it only decremented to 99%. And by
adding a bunch of debug logs and observing numbers.
											
										
										
											2025-08-19 08:20:32 -07:00
+								    /// the context (e.g., system prompt and fixed tool instructions) so that
 								    /// the percentage reflects the portion the user can influence.
 								    ///
 								    /// This normalizes both the numerator and denominator by subtracting the
 								    /// baseline, so immediately after the first prompt the UI shows 100% left
 								    /// and trends toward 0% as the user fills the effective window.
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								    pub fn percent_of_context_window_remaining(&self, context_window: i64) -> i64 {
-												Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
											
										
										
											2025-09-04 16:34:14 -07:00
+								        if context_window <= BASELINE_TOKENS {
-												fix: exclude sysprompt etc from context left % (#2446)

- Prevents the % left indicator from immediately decrementing to ~97%.
- Tested by prompting "hi" and noting it only decremented to 99%. And by
adding a bunch of debug logs and observing numbers.
											
										
										
											2025-08-19 08:20:32 -07:00
+								            return 0;
 								        }
-												Correctly calculate remaining context size (#3190)

We had multiple issues with context size calculation:
1. `initial_prompt_tokens` calculation based on cache size is not
reliable, cache misses might set it to much higher value. For now
hardcoded to a safer constant.
2. Input context size for GPT-5 is 272k (that's where 33% came from).

Fixes.
											
										
										
											2025-09-04 16:34:14 -07:00
+								        let effective_window = context_window - BASELINE_TOKENS;
-												Auto compact at ~90% (#5292)

Users now hit a window exceeded limit and they usually don't know what
to do. This starts auto compact at ~90% of the window.
											
										
										
											2025-10-20 11:29:49 -07:00
+								        let used = (self.tokens_in_context_window() - BASELINE_TOKENS).max(0);
 								        let remaining = (effective_window - used).max(0);
 								        ((remaining as f64 / effective_window as f64) * 100.0)
 								            .clamp(0.0, 100.0)
 								            .round() as i64
-												fix: exclude sysprompt etc from context left % (#2446)

- Prevents the % left indicator from immediately decrementing to ~97%.
- Tested by prompting "hi" and noting it only decremented to 99%. And by
adding a bunch of debug logs and observing numbers.
											
										
										
											2025-08-19 08:20:32 -07:00
+								    }
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
 								    /// In-place element-wise sum of token counts.
 								    pub fn add_assign(&mut self, other: &TokenUsage) {
 								        self.input_tokens += other.input_tokens;
 								        self.cached_input_tokens += other.cached_input_tokens;
 								        self.output_tokens += other.output_tokens;
 								        self.reasoning_output_tokens += other.reasoning_output_tokens;
 								        self.total_tokens += other.total_tokens;
 								    }
-												First pass at a TUI onboarding (#1876)

This sets up the scaffolding and basic flow for a TUI onboarding
experience. It covers sign in with ChatGPT, env auth, as well as some
safety guidance.

Next up:
1. Replace the git warning screen
2. Use this to configure default approval/sandbox modes


Note the shimmer flashes are from me slicing the video, not jank.

https://github.com/user-attachments/assets/0fbe3479-fdde-41f3-87fb-a7a83ab895b8
											
										
										
											2025-08-06 15:22:14 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)]
-												Easily Selectable History (#1672)

This update replaces the previous ratatui history widget with an
append-only log so that the terminal can handle text selection and
scrolling. It also disables streaming responses, which we'll do our best
to bring back in a later PR. It also adds a small summary of token use
after the TUI exits.
											
										
										
											2025-07-25 01:56:40 -07:00
+								pub struct FinalOutput {
 								    pub token_usage: TokenUsage,
 								}
 								impl From<TokenUsage> for FinalOutput {
 								    fn from(token_usage: TokenUsage) -> Self {
 								        Self { token_usage }
 								    }
 								}
 								impl fmt::Display for FinalOutput {
 								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								        let token_usage = &self.token_usage;
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
-												Easily Selectable History (#1672)

This update replaces the previous ratatui history widget with an
append-only log so that the terminal can handle text selection and
scrolling. It also disables streaming responses, which we'll do our best
to bring back in a later PR. It also adds a small summary of token use
after the TUI exits.
											
										
										
											2025-07-25 01:56:40 -07:00
+								        write!(
 								            f,
 								            "Token usage: total={} input={}{} output={}{}",
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
+								            format_with_separators(token_usage.blended_total()),
 								            format_with_separators(token_usage.non_cached_input()),
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								            if token_usage.cached_input() > 0 {
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
+								                format!(
 								                    " (+ {} cached)",
 								                    format_with_separators(token_usage.cached_input())
 								                )
-												[fix] fix absolute and % token counts (#1931)

- For absolute, use non-cached input + output.
- For estimating what % of the model's context window is used, we need
to account for reasoning output tokens from prior turns being dropped
from the context window. We approximate this here by subtracting
reasoning output tokens from the total. This will be off for the current
turn and pending function calls. We can improve it later.
											
										
										
											2025-08-07 01:13:36 -07:00
+								            } else {
 								                String::new()
 								            },
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
+								            format_with_separators(token_usage.output_tokens),
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								            if token_usage.reasoning_output_tokens > 0 {
-												Format large numbers in a more readable way. (#2046)

- In the bottom line of the TUI, print the number of tokens to 3 sigfigs
  with an SI suffix, e.g. "1.23K".
- Elsewhere where we print a number, I figure it's worthwhile to print
  the exact number, because e.g. it's a summary of your session. Here we print
  the numbers comma-separated.
											
										
										
											2025-09-08 14:48:48 -07:00
+								                format!(
 								                    " (reasoning {})",
 								                    format_with_separators(token_usage.reasoning_output_tokens)
 								                )
-												Move token usage/context information to session level (#3221)

Move context information into the main loop so it can be used to
interrupt the loop or start auto-compaction.
											
										
										
											2025-09-06 08:19:23 -07:00
+								            } else {
 								                String::new()
 								            }
-												Easily Selectable History (#1672)

This update replaces the previous ratatui history widget with an
append-only log so that the terminal can handle text selection and
scrolling. It also disables streaming responses, which we'll do our best
to bring back in a later PR. It also adds a small summary of token use
after the TUI exits.
											
										
										
											2025-07-25 01:56:40 -07:00
+								        )
 								    }
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct AgentMessageEvent {
 								    pub message: String,
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								pub struct UserMessageEvent {
 								    pub message: String,
-												Add text element metadata to types (#9235)

Initial type tweaking PR to make the diff of
https://github.com/openai/codex/pull/9116 smaller

This should not change any behavior, just adds some fields to types
											
										
										
											2026-01-14 16:41:50 -08:00
+								    /// Image URLs sourced from `UserInput::Image`. These are safe
 								    /// to replay in legacy UI history events and correspond to images sent to
 								    /// the model.
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
-												Added images to `UserMessageEvent` (#3400)

This PR adds an `images` field to the existing `UserMessageEvent` so we
can encode zero or more images associated with a user message. This
allows images to be restored when conversations are restored.
											
										
										
											2025-09-10 10:18:43 -07:00
+								    pub images: Option<Vec<String>>,
-												Add text element metadata to types (#9235)

Initial type tweaking PR to make the diff of
https://github.com/openai/codex/pull/9116 smaller

This should not change any behavior, just adds some fields to types
											
										
										
											2026-01-14 16:41:50 -08:00
+								    /// Local file paths sourced from `UserInput::LocalImage`. These are kept so
 								    /// the UI can reattach images when editing history, and should not be sent
 								    /// to the model or treated as API-ready URLs.
 								    #[serde(default)]
 								    pub local_images: Vec<std::path::PathBuf>,
 								    /// UI-defined spans within `message` used to render or persist special elements.
 								    #[serde(default)]
 								    pub text_elements: Vec<crate::user_input::TextElement>,
-												Dividing UserMsgs into categories to send it back to the tui (#3127)

This PR does the following:

- divides user msgs into 3 categories: plain, user instructions, and
environment context
- Centralizes adding user instructions and environment context to a
degree
- Improve the integration testing

Building on top of #3123

Specifically this
[comment](https://github.com/openai/codex/pull/3123#discussion_r2319885089).
We need to send the user message while ignoring the User Instructions
and Environment Context we attach.
											
										
										
											2025-09-03 22:34:50 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								pub struct AgentMessageDeltaEvent {
 								    pub delta: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct AgentReasoningEvent {
 								    pub text: String,
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								pub struct AgentReasoningRawContentEvent {
 								    pub text: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Rescue chat completion changes (#1846)

https://github.com/openai/codex/pull/1835 has some messed up history.

This adds support for streaming chat completions, which is useful for ollama. We should probably take a very skeptical eye to the code introduced in this PR.

---------

Co-authored-by: Ahmed Ibrahim <aibrahim@openai.com>
											
										
										
											2025-08-05 01:56:13 -07:00
+								pub struct AgentReasoningRawContentDeltaEvent {
 								    pub delta: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												[App-server] add new v2 events:`item/reasoning/delta`, `item/agentMessage/delta` & `item/reasoning/summaryPartAdded` (#6559)

core event to app server event mapping:
1. `codex/event/reasoning_content_delta` ->
`item/reasoning/summaryTextDelta`.
2. `codex/event/reasoning_raw_content_delta` ->
`item/reasoning/textDelta`
3. `codex/event/agent_message_content_delta` →
`item/agentMessage/delta`.
4. `codex/event/agent_reasoning_section_break` ->
`item/reasoning/summaryPartAdded`.

Also added a change in core to pass down content index, summary index
and item id from events.

Tested with the `git checkout owen/app_server_test_client && cargo run
-p codex-app-server-test-client -- send-message-v2 "hello"` and verified
that new events are emitted correctly.
											
										
										
											2025-11-13 16:25:01 -08:00
+								pub struct AgentReasoningSectionBreakEvent {
 								    // load with default value so it's backward compatible with the old format.
 								    #[serde(default)]
 								    pub item_id: String,
 								    #[serde(default)]
 								    pub summary_index: i64,
 								}
-												Re-add markdown streaming (#2029)

Wait for newlines, then render markdown on a line by line basis. Word wrap it for the current terminal size and then spit it out line by line into the UI. Also adds tests and fixes some UI regressions.
											
										
										
											2025-08-12 17:37:28 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												support deltas in core (#1587)

- Added support for message and reasoning deltas
- Skipped adding the support in the cli and tui for later
- Commented a failing test (wrong merge) that needs fix in a separate
PR.

Side note: I think we need to disable merge when the CI don't pass.
											
										
										
											2025-07-16 15:11:18 -07:00
+								pub struct AgentReasoningDeltaEvent {
 								    pub delta: String,
 								}
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
-												remove conversation history widget (#1727)

this widget is no longer used.
											
										
										
											2025-07-30 10:05:40 -07:00
+								pub struct McpInvocation {
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Name of the MCP server as defined in the config.
 								    pub server: String,
 								    /// Name of the tool as given by the MCP server.
 								    pub tool: String,
 								    /// Arguments to the tool call.
 								    pub arguments: Option<serde_json::Value>,
 								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
-												remove conversation history widget (#1727)

this widget is no longer used.
											
										
										
											2025-07-30 10:05:40 -07:00
+								pub struct McpToolCallBeginEvent {
 								    /// Identifier so this can be paired with the McpToolCallEnd event.
 								    pub call_id: String,
 								    pub invocation: McpInvocation,
 								}
-												[MCP] Render MCP tool call result images to the model (#5600)

It's pretty amazing we have gotten here without the ability for the
model to see image content from MCP tool calls.

This PR builds off of 4391 and fixes #4819. I would like @KKcorps to get
adequete credit here but I also want to get this fix in ASAP so I gave
him a week to update it and haven't gotten a response so I'm going to
take it across the finish line.


This test highlights how absured the current situation is. I asked the
model to read this image using the Chrome MCP
<img width="2378" height="674" alt="image"
src="https://github.com/user-attachments/assets/9ef52608-72a2-4423-9f5e-7ae36b2b56e0"
/>

After this change, it correctly outputs:
> Captured the page: image dhows a dark terminal-style UI labeled
`OpenAI Codex (v0.0.0)` with prompt `model: gpt-5-codex medium` and
working directory `/codex/codex-rs`
(and more)  

Before this change, it said:
> Took the full-page screenshot you asked for. It shows a long,
horizontally repeating pattern of stylized people in orange, light-blue,
and mustard clothing, holding hands in alternating poses against a white
background. No text or other graphics-just rows of flat illustration
stretching off to the right.

Without this change, the Figma, Playwright, Chrome, and other visual MCP
servers are pretty much entirely useless.

I tested this change with the openai respones api as well as a third
party completions api
											
										
										
											2025-10-27 14:55:57 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct McpToolCallEndEvent {
 								    /// Identifier for the corresponding McpToolCallBegin that finished.
 								    pub call_id: String,
-												remove conversation history widget (#1727)

this widget is no longer used.
											
										
										
											2025-07-30 10:05:40 -07:00
+								    pub invocation: McpInvocation,
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								    #[ts(type = "string")]
-												remove conversation history widget (#1727)

this widget is no longer used.
											
										
										
											2025-07-30 10:05:40 -07:00
+								    pub duration: Duration,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Result of the tool call. Note this could be an error.
-												fix: introduce ResponseInputItem::McpToolCallOutput variant (#1151)

The output of an MCP server tool call can be one of several types, but
to date, we treated all outputs as text by showing the serialized JSON
as the "tool output" in Codex:


https://github.com/openai/codex/blob/25a9949c49194d5a64de54a11bcc5b4724ac9bd5/codex-rs/mcp-types/src/lib.rs#L96-L101

This PR adds support for the `ImageContent` variant so we can now
display an image output from an MCP tool call.

In making this change, we introduce a new
`ResponseInputItem::McpToolCallOutput` variant so that we can work with
the `mcp_types::CallToolResult` directly when the function call is made
to an MCP server.

Though arguably the more significant change is the introduction of
`HistoryCell::CompletedMcpToolCallWithImageOutput`, which is a cell that
uses `ratatui_image` to render an image into the terminal. To support
this, we introduce `ImageRenderCache`, cache a
`ratatui_image::picker::Picker`, and `ensure_image_cache()` to cache the
appropriate scaled image data and dimensions based on the current
terminal size.

To test, I created a minimal `package.json`:

```json
{
  "name": "kitty-mcp",
  "version": "1.0.0",
  "type": "module",
  "description": "MCP that returns image of kitty",
  "main": "index.js",
  "dependencies": {
    "@modelcontextprotocol/sdk": "^1.12.0"
  }
}
```

with the following `index.js` to define the MCP server:

```js
#!/usr/bin/env node

import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { readFile } from "node:fs/promises";
import { join } from "node:path";

const IMAGE_URI = "image://Ada.png";

const server = new McpServer({
  name: "Demo",
  version: "1.0.0",
});

server.tool(
  "get-cat-image",
  "If you need a cat image, this tool will provide one.",
  async () => ({
    content: [
      { type: "image", data: await getAdaPngBase64(), mimeType: "image/png" },
    ],
  })
);

server.resource("Ada the Cat", IMAGE_URI, async (uri) => {
  const base64Image = await getAdaPngBase64();
  return {
    contents: [
      {
        uri: uri.href,
        mimeType: "image/png",
        blob: base64Image,
      },
    ],
  };
});

async function getAdaPngBase64() {
  const __dirname = new URL(".", import.meta.url).pathname;
  // From https://github.com/benjajaja/ratatui-image/blob/9705ce2c59ec669abbce2924cbfd1f5ae22c9860/assets/Ada.png
  const filePath = join(__dirname, "Ada.png");
  const imageData = await readFile(filePath);
  const base64Image = imageData.toString("base64");
  return base64Image;
}

const transport = new StdioServerTransport();
await server.connect(transport);
```

With the local changes from this PR, I added the following to my
`config.toml`:

```toml
[mcp_servers.kitty]
command = "node"
args = ["/Users/mbolin/code/kitty-mcp/index.js"]
```

Running the TUI from source:

```
cargo run --bin codex -- --model o3 'I need a picture of a cat'
```

I get:

<img width="732" alt="image"
src="https://github.com/user-attachments/assets/bf80b721-9ca0-4d81-aec7-77d6899e2869"
/>

Now, that said, I have only tested in iTerm and there is definitely some
funny business with getting an accurate character-to-pixel ratio
(sometimes the `CompletedMcpToolCallWithImageOutput` thinks it needs 10
rows to render instead of 4), so there is still work to be done here.
											
										
										
											2025-05-28 19:03:17 -07:00
+								    pub result: Result<CallToolResult, String>,
 								}
 								impl McpToolCallEndEvent {
 								    pub fn is_success(&self) -> bool {
 								        match &self.result {
 								            Ok(result) => !result.is_error.unwrap_or(false),
 								            Err(_) => false,
 								        }
 								    }
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								pub struct WebSearchBeginEvent {
 								    pub call_id: String,
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Following up on #2371 post commit feedback (#2852)

- Introduce websearch end to complement the begin 
- Moves the logic of adding the sebsearch tool to
create_tools_json_for_responses_api
- Making it the client responsibility to toggle the tool on or off 
- Other misc in #2371 post commit feedback
- Show the query:

<img width="1392" height="151" alt="image"
src="https://github.com/user-attachments/assets/8457f1a6-f851-44cf-bcca-0d4fe460ce89"
/>
											
										
										
											2025-08-28 19:24:38 -07:00
+								pub struct WebSearchEndEvent {
 								    pub call_id: String,
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								    pub query: String,
-												fix: handle all web_search actions and in progress invocations (#9960)

### Summary
- Parse all `web_search` tool actions (`search`, `find_in_page`,
`open_page`).
- Previously we only parsed + displayed `search`, which made the TUI
appear to pause when the other actions were being used.
- Show in progress `web_search` calls as `Searching the web`
  - Previously we only showed completed tool calls

<img width="308" height="149" alt="image"
src="https://github.com/user-attachments/assets/90a4e8ff-b06a-48ff-a282-b57b31121845"
/>

### Tests
Added + updated tests, tested locally

### Follow ups
Update VSCode extension to display these as well
											
										
										
											2026-01-26 19:33:48 -08:00
+								    pub action: WebSearchAction,
-												Add web search tool (#2371)

Adds web_search tool, enabling the model to use Responses API web_search
tool.
- Disabled by default, enabled by --search flag
- When --search is passed, exposes web_search_request function tool to
the model, which triggers user approval. When approved, the model can
use the web_search tool for the remainder of the turn
<img width="1033" height="294" alt="image"
src="https://github.com/user-attachments/assets/62ac6563-b946-465c-ba5d-9325af28b28f"
/>

---------

Co-authored-by: easong-openai <easong@openai.com>
											
										
										
											2025-08-23 22:58:56 -07:00
+								}
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								// Conversation kept for backward compatibility.
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								/// Response payload for `Op::GetHistory` containing the current session's
 								/// in-memory transcript.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								pub struct ConversationPathResponseEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    pub conversation_id: ThreadId,
-												Change forking to read the rollout from file (#3440)

This PR changes get history op to get path. Then, forking will use a
path. This will help us have one unified codepath for resuming/forking
conversations. Will also help in having rollout history in order. It
also fixes a bug where you won't see the UI when resuming after forking.
											
										
										
											2025-09-10 17:42:54 -07:00
+								    pub path: PathBuf,
-												fork conversation from a previous message (#2575)

This can be the underlying logic in order to start a conversation from a
previous message. will need some love in the UI.

Base for building this: #2588
											
										
										
											2025-08-22 17:06:09 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub struct ResumedHistory {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    pub conversation_id: ThreadId,
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								    pub history: Vec<RolloutItem>,
 								    pub rollout_path: PathBuf,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub enum InitialHistory {
 								    New,
 								    Resumed(ResumedHistory),
 								    Forked(Vec<RolloutItem>),
 								}
 								impl InitialHistory {
-												feat: show forked from session id in /status (#9330)

Summary:
- Add forked_from to SessionMeta/SessionConfiguredEvent and persist it
for forked sessions.
- Surface forked_from in /status for tui + tui2 and add snapshots.
											
										
										
											2026-01-16 13:41:46 -08:00
+								    pub fn forked_from_id(&self) -> Option<ThreadId> {
 								        match self {
 								            InitialHistory::New => None,
 								            InitialHistory::Resumed(resumed) => {
 								                resumed.history.iter().find_map(|item| match item {
 								                    RolloutItem::SessionMeta(meta_line) => meta_line.meta.forked_from_id,
 								                    _ => None,
 								                })
 								            }
 								            InitialHistory::Forked(items) => items.iter().find_map(|item| match item {
 								                RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.id),
 								                _ => None,
 								            }),
 								        }
 								    }
-												Add layered config.toml support to app server (#9510)

This PR adds support for chained (layered) config.toml file merging for
clients that use the app server interface. This feature already exists
for the TUI, but it does not work for GUI clients.

It does the following:
* Changes code paths for new thread, resume thread, and fork thread to
use the effective config based on the cwd.
* Updates the `config/read` API to accept an optional `cwd` parameter.
If specified, the API returns the effective config based on that cwd
path. Also optionally includes all layers including project config
files. If cwd is not specified, the API falls back on its older behavior
where it considers only the global (non-project) config files when
computing the effective config.

The changes in codex_message_processor.rs look deceptively large. They
mostly just involve moving existing blocks of code to a later point in
some functions so it can use the cwd to calculate the config.

This PR builds upon #9509 and should be reviewed and merged after that
PR.

Tested:
* Verified change with (dependent, as-yet-uncommitted) changes to IDE
Extension and confirmed correct behavior

The full fix requires additional changes in the IDE Extension code base,
but they depend on this PR.
											
										
										
											2026-01-21 14:21:48 -08:00
+								    pub fn session_cwd(&self) -> Option<PathBuf> {
 								        match self {
 								            InitialHistory::New => None,
 								            InitialHistory::Resumed(resumed) => session_cwd_from_items(&resumed.history),
 								            InitialHistory::Forked(items) => session_cwd_from_items(items),
 								        }
 								    }
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								    pub fn get_rollout_items(&self) -> Vec<RolloutItem> {
 								        match self {
 								            InitialHistory::New => Vec::new(),
 								            InitialHistory::Resumed(resumed) => resumed.history.clone(),
 								            InitialHistory::Forked(items) => items.clone(),
 								        }
 								    }
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								    pub fn get_event_msgs(&self) -> Option<Vec<EventMsg>> {
 								        match self {
 								            InitialHistory::New => None,
 								            InitialHistory::Resumed(resumed) => Some(
 								                resumed
 								                    .history
 								                    .iter()
 								                    .filter_map(|ri| match ri {
 								                        RolloutItem::EventMsg(ev) => Some(ev.clone()),
 								                        _ => None,
 								                    })
 								                    .collect(),
 								            ),
 								            InitialHistory::Forked(items) => Some(
 								                items
 								                    .iter()
 								                    .filter_map(|ri| match ri {
 								                        RolloutItem::EventMsg(ev) => Some(ev.clone()),
 								                        _ => None,
 								                    })
 								                    .collect(),
 								            ),
 								        }
 								    }
-												fix(core) Preserve base_instructions in SessionMeta (#9427)

## Summary
This PR consolidates base_instructions onto SessionMeta /
SessionConfiguration, so we ensure `base_instructions` is set once per
session and should be (mostly) immutable, unless:
- overridden by config on resume / fork
- sub-agent tasks, like review or collab


In a future PR, we should convert all references to `base_instructions`
to consistently used the typed struct, so it's less likely that we put
other strings there. See #9423. However, this PR is already quite
complex, so I'm deferring that to a follow-up.

## Testing
- [x] Added a resume test to assert that instructions are preserved. In
particular, `resume_switches_models_preserves_base_instructions` fails
against main.

Existing test coverage thats assert base instructions are preserved
across multiple requests in a session:
- Manual compact keeps baseline instructions:
core/tests/suite/compact.rs:199
- Auto-compact keeps baseline instructions:
core/tests/suite/compact.rs:1142
- Prompt caching reuses the same instructions across two requests:
core/tests/suite/prompt_caching.rs:150 and
core/tests/suite/prompt_caching.rs:157
- Prompt caching with explicit expected string across two requests:
core/tests/suite/prompt_caching.rs:213 and
core/tests/suite/prompt_caching.rs:222
- Resume with model switch keeps original instructions:
core/tests/suite/resume.rs:136
- Compact/resume/fork uses request 0 instructions for later expected
payloads: core/tests/suite/compact_resume_fork.rs:215
											
										
										
											2026-01-19 21:59:36 -08:00
 								    pub fn get_base_instructions(&self) -> Option<BaseInstructions> {
 								        // TODO: SessionMeta should (in theory) always be first in the history, so we can probably only check the first item?
 								        match self {
 								            InitialHistory::New => None,
 								            InitialHistory::Resumed(resumed) => {
 								                resumed.history.iter().find_map(|item| match item {
 								                    RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(),
 								                    _ => None,
 								                })
 								            }
 								            InitialHistory::Forked(items) => items.iter().find_map(|item| match item {
 								                RolloutItem::SessionMeta(meta_line) => meta_line.meta.base_instructions.clone(),
 								                _ => None,
 								            }),
 								        }
 								    }
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								}
-												Add layered config.toml support to app server (#9510)

This PR adds support for chained (layered) config.toml file merging for
clients that use the app server interface. This feature already exists
for the TUI, but it does not work for GUI clients.

It does the following:
* Changes code paths for new thread, resume thread, and fork thread to
use the effective config based on the cwd.
* Updates the `config/read` API to accept an optional `cwd` parameter.
If specified, the API returns the effective config based on that cwd
path. Also optionally includes all layers including project config
files. If cwd is not specified, the API falls back on its older behavior
where it considers only the global (non-project) config files when
computing the effective config.

The changes in codex_message_processor.rs look deceptively large. They
mostly just involve moving existing blocks of code to a later point in
some functions so it can use the cwd to calculate the config.

This PR builds upon #9509 and should be reviewed and merged after that
PR.

Tested:
* Verified change with (dependent, as-yet-uncommitted) changes to IDE
Extension and confirmed correct behavior

The full fix requires additional changes in the IDE Extension code base,
but they depend on this PR.
											
										
										
											2026-01-21 14:21:48 -08:00
+								fn session_cwd_from_items(items: &[RolloutItem]) -> Option<PathBuf> {
 								    items.iter().find_map(|item| match item {
 								        RolloutItem::SessionMeta(meta_line) => Some(meta_line.meta.cwd.clone()),
 								        _ => None,
 								    })
 								}
-												Delegate review to codex instance (#5572)

In this PR, I am exploring migrating task kind to an invocation of
Codex. The main reason would be getting rid off multiple
`ConversationHistory` state and streamlining our context/history
management.

This approach depends on opening a channel between the sub-codex and
codex. This channel is responsible for forwarding `interactive`
(`approvals`) and `non-interactive` events. The `task` is responsible
for handling those events.

This opens the door for implementing `codex as a tool`, replacing
`compact` and `review`, and potentially subagents.

One consideration is this code is very similar to `app-server` specially
in the approval part. If in the future we wanted an interactive
`sub-codex` we should consider using `codex-mcp`
											
										
										
											2025-10-29 14:04:25 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS, Default)]
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								#[serde(rename_all = "lowercase")]
 								#[ts(rename_all = "lowercase")]
 								pub enum SessionSource {
 								    Cli,
 								    #[default]
 								    VSCode,
 								    Exec,
 								    Mcp,
-												Delegate review to codex instance (#5572)

In this PR, I am exploring migrating task kind to an invocation of
Codex. The main reason would be getting rid off multiple
`ConversationHistory` state and streamlining our context/history
management.

This approach depends on opening a channel between the sub-codex and
codex. This channel is responsible for forwarding `interactive`
(`approvals`) and `non-interactive` events. The `task` is responsible
for handling those events.

This opens the door for implementing `codex as a tool`, replacing
`compact` and `review`, and potentially subagents.

One consideration is this code is very similar to `app-server` specially
in the approval part. If in the future we wanted an interactive
`sub-codex` we should consider using `codex-mcp`
											
										
										
											2025-10-29 14:04:25 -07:00
+								    SubAgent(SubAgentSource),
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								    #[serde(other)]
 								    Unknown,
 								}
-												Delegate review to codex instance (#5572)

In this PR, I am exploring migrating task kind to an invocation of
Codex. The main reason would be getting rid off multiple
`ConversationHistory` state and streamlining our context/history
management.

This approach depends on opening a channel between the sub-codex and
codex. This channel is responsible for forwarding `interactive`
(`approvals`) and `non-interactive` events. The `task` is responsible
for handling those events.

This opens the door for implementing `codex as a tool`, replacing
`compact` and `review`, and potentially subagents.

One consideration is this code is very similar to `app-server` specially
in the approval part. If in the future we wanted an interactive
`sub-codex` we should consider using `codex-mcp`
											
										
										
											2025-10-29 14:04:25 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, Eq, JsonSchema, TS)]
-												Send delegate header (#5942)

Send delegate type header
											
										
										
											2025-10-30 02:49:40 -07:00
+								#[serde(rename_all = "snake_case")]
 								#[ts(rename_all = "snake_case")]
-												Delegate review to codex instance (#5572)

In this PR, I am exploring migrating task kind to an invocation of
Codex. The main reason would be getting rid off multiple
`ConversationHistory` state and streamlining our context/history
management.

This approach depends on opening a channel between the sub-codex and
codex. This channel is responsible for forwarding `interactive`
(`approvals`) and `non-interactive` events. The `task` is responsible
for handling those events.

This opens the door for implementing `codex as a tool`, replacing
`compact` and `review`, and potentially subagents.

One consideration is this code is very similar to `app-server` specially
in the approval part. If in the future we wanted an interactive
`sub-codex` we should consider using `codex-mcp`
											
										
										
											2025-10-29 14:04:25 -07:00
+								pub enum SubAgentSource {
 								    Review,
 								    Compact,
-												feat: cap number of agents (#9855)

Adding more guards to agent:
* Max depth or 1 (i.e. a sub-agent can't spawn another one)
* Max 12 sub-agents in total
											
										
										
											2026-01-25 15:57:22 +01:00
+								    ThreadSpawn {
 								        parent_thread_id: ThreadId,
 								        depth: i32,
 								    },
-												Delegate review to codex instance (#5572)

In this PR, I am exploring migrating task kind to an invocation of
Codex. The main reason would be getting rid off multiple
`ConversationHistory` state and streamlining our context/history
management.

This approach depends on opening a channel between the sub-codex and
codex. This channel is responsible for forwarding `interactive`
(`approvals`) and `non-interactive` events. The `task` is responsible
for handling those events.

This opens the door for implementing `codex as a tool`, replacing
`compact` and `review`, and potentially subagents.

One consideration is this code is very similar to `app-server` specially
in the approval part. If in the future we wanted an interactive
`sub-codex` we should consider using `codex-mcp`
											
										
										
											2025-10-29 14:04:25 -07:00
+								    Other(String),
 								}
-												[feedback] Add source info into feedback metadata. (#7140)

Verified the source info is correctly attached based on whether it's cli
or vscode.
											
										
										
											2025-11-24 11:05:37 -08:00
+								impl fmt::Display for SessionSource {
 								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 								        match self {
 								            SessionSource::Cli => f.write_str("cli"),
 								            SessionSource::VSCode => f.write_str("vscode"),
 								            SessionSource::Exec => f.write_str("exec"),
 								            SessionSource::Mcp => f.write_str("mcp"),
 								            SessionSource::SubAgent(sub_source) => write!(f, "subagent_{sub_source}"),
 								            SessionSource::Unknown => f.write_str("unknown"),
 								        }
 								    }
 								}
 								impl fmt::Display for SubAgentSource {
 								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 								        match self {
 								            SubAgentSource::Review => f.write_str("review"),
 								            SubAgentSource::Compact => f.write_str("compact"),
-												feat: cap number of agents (#9855)

Adding more guards to agent:
* Max depth or 1 (i.e. a sub-agent can't spawn another one)
* Max 12 sub-agents in total
											
										
										
											2026-01-25 15:57:22 +01:00
+								            SubAgentSource::ThreadSpawn {
 								                parent_thread_id,
 								                depth,
 								            } => {
 								                write!(f, "thread_spawn_{parent_thread_id}_d{depth}")
-												feat: add thread spawn source for collab tools (#9769)


											
										
										
											2026-01-24 15:21:34 +01:00
+								            }
-												[feedback] Add source info into feedback metadata. (#7140)

Verified the source info is correctly attached based on whether it's cli
or vscode.
											
										
										
											2025-11-24 11:05:37 -08:00
+								            SubAgentSource::Other(other) => f.write_str(other),
 								        }
 								    }
 								}
-												fix(core) Preserve base_instructions in SessionMeta (#9427)

## Summary
This PR consolidates base_instructions onto SessionMeta /
SessionConfiguration, so we ensure `base_instructions` is set once per
session and should be (mostly) immutable, unless:
- overridden by config on resume / fork
- sub-agent tasks, like review or collab


In a future PR, we should convert all references to `base_instructions`
to consistently used the typed struct, so it's less likely that we put
other strings there. See #9423. However, this PR is already quite
complex, so I'm deferring that to a follow-up.

## Testing
- [x] Added a resume test to assert that instructions are preserved. In
particular, `resume_switches_models_preserves_base_instructions` fails
against main.

Existing test coverage thats assert base instructions are preserved
across multiple requests in a session:
- Manual compact keeps baseline instructions:
core/tests/suite/compact.rs:199
- Auto-compact keeps baseline instructions:
core/tests/suite/compact.rs:1142
- Prompt caching reuses the same instructions across two requests:
core/tests/suite/prompt_caching.rs:150 and
core/tests/suite/prompt_caching.rs:157
- Prompt caching with explicit expected string across two requests:
core/tests/suite/prompt_caching.rs:213 and
core/tests/suite/prompt_caching.rs:222
- Resume with model switch keeps original instructions:
core/tests/suite/resume.rs:136
- Compact/resume/fork uses request 0 instructions for later expected
payloads: core/tests/suite/compact_resume_fork.rs:215
											
										
										
											2026-01-19 21:59:36 -08:00
+								/// SessionMeta contains session-level data that doesn't correspond to a specific turn.
 								///
 								/// NOTE: There used to be an `instructions` field here, which stored user_instructions, but we
 								/// now save that on TurnContext. base_instructions stores the base instructions for the session,
 								/// and should be used when there is no config override.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub struct SessionMeta {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    pub id: ThreadId,
-												feat: show forked from session id in /status (#9330)

Summary:
- Add forked_from to SessionMeta/SessionConfiguredEvent and persist it
for forked sessions.
- Surface forked_from in /status for tui + tui2 and add snapshots.
											
										
										
											2026-01-16 13:41:46 -08:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub forked_from_id: Option<ThreadId>,
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								    pub timestamp: String,
 								    pub cwd: PathBuf,
 								    pub originator: String,
 								    pub cli_version: String,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								    #[serde(default)]
 								    pub source: SessionSource,
-												feat: annotate conversations with model_provider for filtering (#5658)

Because conversations that use the Responses API can have encrypted
reasoning messages, trying to resume a conversation with a different
provider could lead to confusing "failed to decrypt" errors. (This is
reproducible by starting a conversation using ChatGPT login and resuming
it as a conversation that uses OpenAI models via Azure.)

This changes `ListConversationsParams` to take a `model_providers:
Option<Vec<String>>` and adds `model_provider` on each
`ConversationSummary` it returns so these cases can be disambiguated.

Note this ended up making changes to
`codex-rs/core/src/rollout/tests.rs` because it had a number of cases
where it expected `Some` for the value of `next_cursor`, but the list of
rollouts was complete, so according to this docstring:


https://github.com/openai/codex/blob/bcd64c7e7231d6316a2377d1525a0fa74f21b783/codex-rs/app-server-protocol/src/protocol.rs#L334-L337

If there are no more items to return, then `next_cursor` should be
`None`. This PR updates that logic.






---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/5658).
* #5803
* #5793
* __->__ #5658
											
										
										
											2025-10-27 02:03:30 -07:00
+								    pub model_provider: Option<String>,
-												fix(core) Preserve base_instructions in SessionMeta (#9427)

## Summary
This PR consolidates base_instructions onto SessionMeta /
SessionConfiguration, so we ensure `base_instructions` is set once per
session and should be (mostly) immutable, unless:
- overridden by config on resume / fork
- sub-agent tasks, like review or collab


In a future PR, we should convert all references to `base_instructions`
to consistently used the typed struct, so it's less likely that we put
other strings there. See #9423. However, this PR is already quite
complex, so I'm deferring that to a follow-up.

## Testing
- [x] Added a resume test to assert that instructions are preserved. In
particular, `resume_switches_models_preserves_base_instructions` fails
against main.

Existing test coverage thats assert base instructions are preserved
across multiple requests in a session:
- Manual compact keeps baseline instructions:
core/tests/suite/compact.rs:199
- Auto-compact keeps baseline instructions:
core/tests/suite/compact.rs:1142
- Prompt caching reuses the same instructions across two requests:
core/tests/suite/prompt_caching.rs:150 and
core/tests/suite/prompt_caching.rs:157
- Prompt caching with explicit expected string across two requests:
core/tests/suite/prompt_caching.rs:213 and
core/tests/suite/prompt_caching.rs:222
- Resume with model switch keeps original instructions:
core/tests/suite/resume.rs:136
- Compact/resume/fork uses request 0 instructions for later expected
payloads: core/tests/suite/compact_resume_fork.rs:215
											
										
										
											2026-01-19 21:59:36 -08:00
+								    /// base_instructions for the session. This *should* always be present when creating a new session,
 								    /// but may be missing for older sessions. If not present, fall back to rendering the base_instructions
 								    /// from ModelsManager.
 								    pub base_instructions: Option<BaseInstructions>,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								}
 								impl Default for SessionMeta {
 								    fn default() -> Self {
 								        SessionMeta {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								            id: ThreadId::default(),
-												feat: show forked from session id in /status (#9330)

Summary:
- Add forked_from to SessionMeta/SessionConfiguredEvent and persist it
for forked sessions.
- Surface forked_from in /status for tui + tui2 and add snapshots.
											
										
										
											2026-01-16 13:41:46 -08:00
+								            forked_from_id: None,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								            timestamp: String::new(),
 								            cwd: PathBuf::new(),
 								            originator: String::new(),
 								            cli_version: String::new(),
 								            source: SessionSource::default(),
-												feat: annotate conversations with model_provider for filtering (#5658)

Because conversations that use the Responses API can have encrypted
reasoning messages, trying to resume a conversation with a different
provider could lead to confusing "failed to decrypt" errors. (This is
reproducible by starting a conversation using ChatGPT login and resuming
it as a conversation that uses OpenAI models via Azure.)

This changes `ListConversationsParams` to take a `model_providers:
Option<Vec<String>>` and adds `model_provider` on each
`ConversationSummary` it returns so these cases can be disambiguated.

Note this ended up making changes to
`codex-rs/core/src/rollout/tests.rs` because it had a number of cases
where it expected `Some` for the value of `next_cursor`, but the list of
rollouts was complete, so according to this docstring:


https://github.com/openai/codex/blob/bcd64c7e7231d6316a2377d1525a0fa74f21b783/codex-rs/app-server-protocol/src/protocol.rs#L334-L337

If there are no more items to return, then `next_cursor` should be
`None`. This PR updates that logic.






---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/5658).
* #5803
* #5793
* __->__ #5658
											
										
										
											2025-10-27 02:03:30 -07:00
+								            model_provider: None,
-												fix(core) Preserve base_instructions in SessionMeta (#9427)

## Summary
This PR consolidates base_instructions onto SessionMeta /
SessionConfiguration, so we ensure `base_instructions` is set once per
session and should be (mostly) immutable, unless:
- overridden by config on resume / fork
- sub-agent tasks, like review or collab


In a future PR, we should convert all references to `base_instructions`
to consistently used the typed struct, so it's less likely that we put
other strings there. See #9423. However, this PR is already quite
complex, so I'm deferring that to a follow-up.

## Testing
- [x] Added a resume test to assert that instructions are preserved. In
particular, `resume_switches_models_preserves_base_instructions` fails
against main.

Existing test coverage thats assert base instructions are preserved
across multiple requests in a session:
- Manual compact keeps baseline instructions:
core/tests/suite/compact.rs:199
- Auto-compact keeps baseline instructions:
core/tests/suite/compact.rs:1142
- Prompt caching reuses the same instructions across two requests:
core/tests/suite/prompt_caching.rs:150 and
core/tests/suite/prompt_caching.rs:157
- Prompt caching with explicit expected string across two requests:
core/tests/suite/prompt_caching.rs:213 and
core/tests/suite/prompt_caching.rs:222
- Resume with model switch keeps original instructions:
core/tests/suite/resume.rs:136
- Compact/resume/fork uses request 0 instructions for later expected
payloads: core/tests/suite/compact_resume_fork.rs:215
											
										
										
											2026-01-19 21:59:36 -08:00
+								            base_instructions: None,
-												Separate interactive and non-interactive sessions (#4612)

Do not show exec session in VSCode/TUI selector.
											
										
										
											2025-10-02 13:06:21 -07:00
+								        }
 								    }
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub struct SessionMetaLine {
 								    #[serde(flatten)]
 								    pub meta: SessionMeta,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub git: Option<GitInfo>,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Debug, Clone, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								#[serde(tag = "type", content = "payload", rename_all = "snake_case")]
 								pub enum RolloutItem {
 								    SessionMeta(SessionMetaLine),
 								    ResponseItem(ResponseItem),
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    Compacted(CompactedItem),
 								    TurnContext(TurnContextItem),
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								    EventMsg(EventMsg),
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								pub struct CompactedItem {
 								    pub message: String,
-												feat: remote compaction (#6795)

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-11-18 16:51:16 +00:00
+								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    pub replacement_history: Option<Vec<ResponseItem>>,
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								}
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								impl From<CompactedItem> for ResponseItem {
 								    fn from(value: CompactedItem) -> Self {
 								        ResponseItem::Message {
 								            id: None,
 								            role: "assistant".to_string(),
 								            content: vec![ContentItem::OutputText {
 								                text: value.message,
 								            }],
-												Support end_turn flag (#9698)

Experimental flag that signals the end of the turn.
											
										
										
											2026-01-22 09:27:48 -08:00
+								            end_turn: None,
-												Handle resuming/forking after compact (#3533)

We need to construct the history different when compact happens. For
this, we need to just consider the history after compact and convert
compact to a response item.

This needs to change and use `build_compact_history` when this #3446 is
merged.
											
										
										
											2025-09-14 09:23:31 -04:00
+								        }
 								    }
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								pub struct TurnContextItem {
 								    pub cwd: PathBuf,
 								    pub approval_policy: AskForApproval,
 								    pub sandbox_policy: SandboxPolicy,
 								    pub model: String,
-												feat(core) update Personality on turn (#9644)

## Summary
Support updating Personality mid-Thread via UserTurn/OverwriteTurn. This
is explicitly unused by the clients so far, to simplify PRs - app-server
and tui implementations will be follow-ups.

## Testing
- [x] added integration tests
											
										
										
											2026-01-22 12:04:23 -08:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub personality: Option<Personality>,
-												Add collaboration_mode to TurnContextItem (#9583)

## Summary
- add optional `collaboration_mode` to `TurnContextItem` in rollouts
- persist the current collaboration mode when recording turn context
(sampling + compaction)

## Rationale
We already persist turn context data for resume logic. Capturing
collaboration mode in the rollout gives us the mode context for each
turn, enabling follow‑up work to diff mode instructions correctly on
resume.

## Changes
- protocol: add optional `collaboration_mode` field to `TurnContextItem`
- core: persist collaboration mode alongside other turn context settings
in rollouts
											
										
										
											2026-01-21 14:14:21 -08:00
+								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    pub collaboration_mode: Option<CollaborationMode>,
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub effort: Option<ReasoningEffortConfig>,
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								    pub summary: ReasoningSummaryConfig,
-												chore: save more about turn context in rollout log file (#8458)

### Motivation
- Persist richer per-turn configuration in rollouts so resumed/forked
sessions and tooling can reason about the exact instruction inputs and
output constraints used for a turn.

### Description
- Extend `TurnContextItem` to include optional `base_instructions`,
`user_instructions`, and `developer_instructions`.
- Record the optional `final_output_json_schema` associated with a turn.
- Add an optional `truncation_policy` to `TurnContextItem` and populate
it when writing turn-context rollout items.
- Introduce a protocol-level `TruncationPolicy` representation and
convert from core truncation policy when recording.

### Testing
- `cargo test -p codex-protocol` (pass)
											
										
										
											2025-12-22 19:51:07 -08:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub user_instructions: Option<String>,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub developer_instructions: Option<String>,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub final_output_json_schema: Option<Value>,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub truncation_policy: Option<TruncationPolicy>,
 								}
 								#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
 								#[serde(tag = "mode", content = "limit", rename_all = "snake_case")]
 								pub enum TruncationPolicy {
 								    Bytes(usize),
 								    Tokens(usize),
-												Add Compact and Turn Context to the rollout items (#3444)

Adding compact and turn context to the rollout items

based on #3440
											
										
										
											2025-09-11 11:08:51 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Clone, JsonSchema)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub struct RolloutLine {
 								    pub timestamp: String,
 								    #[serde(flatten)]
 								    pub item: RolloutItem,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Serialize, Deserialize, Clone, Debug, JsonSchema, TS)]
-												Move initial history to protocol (#3422)

To fix an edge case of forking then resuming

#3419
											
										
										
											2025-09-10 10:17:24 -07:00
+								pub struct GitInfo {
 								    /// Current commit hash (SHA)
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub commit_hash: Option<String>,
 								    /// Current branch name
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub branch: Option<String>,
 								    /// Repository URL (if available from remote)
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub repository_url: Option<String>,
 								}
-												feat: detached review (#7292)


											
										
										
											2025-11-28 11:34:57 +00:00
+								#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
 								#[serde(rename_all = "snake_case")]
 								pub enum ReviewDelivery {
 								    Inline,
 								    Detached,
 								}
-												chore: review everywhere (#7444)


											
										
										
											2025-12-02 11:26:27 +00:00
+								#[derive(Serialize, Deserialize, Clone, Debug, PartialEq, JsonSchema, TS)]
 								#[serde(tag = "type", rename_all = "camelCase")]
 								#[ts(tag = "type")]
 								pub enum ReviewTarget {
 								    /// Review the working tree: staged, unstaged, and untracked files.
 								    UncommittedChanges,
 								    /// Review changes between the current branch and the given base branch.
 								    #[serde(rename_all = "camelCase")]
 								    #[ts(rename_all = "camelCase")]
 								    BaseBranch { branch: String },
 								    /// Review the changes introduced by a specific commit.
 								    #[serde(rename_all = "camelCase")]
 								    #[ts(rename_all = "camelCase")]
 								    Commit {
 								        sha: String,
 								        /// Optional human-readable label (e.g., commit subject) for UIs.
 								        title: Option<String>,
 								    },
 								    /// Arbitrary instructions provided by the user.
 								    #[serde(rename_all = "camelCase")]
 								    #[ts(rename_all = "camelCase")]
 								    Custom { instructions: String },
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												feat: review in app server (#6613)


											
										
										
											2025-11-18 21:58:54 +00:00
+								/// Review request sent to the review session.
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								pub struct ReviewRequest {
-												chore: review everywhere (#7444)


											
										
										
											2025-12-02 11:26:27 +00:00
+								    pub target: ReviewTarget,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub user_facing_hint: Option<String>,
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								}
 								/// Structured review result produced by a child review session.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								pub struct ReviewOutputEvent {
 								    pub findings: Vec<ReviewFinding>,
 								    pub overall_correctness: String,
 								    pub overall_explanation: String,
 								    pub overall_confidence_score: f32,
 								}
 								impl Default for ReviewOutputEvent {
 								    fn default() -> Self {
 								        Self {
 								            findings: Vec::new(),
 								            overall_correctness: String::default(),
 								            overall_explanation: String::default(),
 								            overall_confidence_score: 0.0,
 								        }
 								    }
 								}
 								/// A single review finding describing an observed issue or recommendation.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								pub struct ReviewFinding {
 								    pub title: String,
 								    pub body: String,
 								    pub confidence_score: f32,
 								    pub priority: i32,
 								    pub code_location: ReviewCodeLocation,
 								}
 								/// Location of the code related to a review finding.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								pub struct ReviewCodeLocation {
 								    pub absolute_file_path: PathBuf,
 								    pub line_range: ReviewLineRange,
 								}
 								/// Inclusive line range in a file associated with the finding.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												Review Mode (Core) (#3401)

## 📝 Review Mode -- Core

This PR introduces the Core implementation for Review mode:

- New op `Op::Review { prompt: String }:` spawns a child review task
with isolated context, a review‑specific system prompt, and a
`Config.review_model`.
- `EnteredReviewMode`: emitted when the child review session starts.
Every event from this point onwards reflects the review session.
- `ExitedReviewMode(Option<ReviewOutputEvent>)`: emitted when the review
finishes or is interrupted, with optional structured findings:

```json
{
  "findings": [
    {
      "title": "<≤ 80 chars, imperative>",
      "body": "<valid Markdown explaining *why* this is a problem; cite files/lines/functions>",
      "confidence_score": <float 0.0-1.0>,
      "priority": <int 0-3>,
      "code_location": {
        "absolute_file_path": "<file path>",
        "line_range": {"start": <int>, "end": <int>}
      }
    }
  ],
  "overall_correctness": "patch is correct" | "patch is incorrect",
  "overall_explanation": "<1-3 sentence explanation justifying the overall_correctness verdict>",
  "overall_confidence_score": <float 0.0-1.0>
}
```

## Questions

### Why separate out its own message history?

We want the review thread to match the training of our review models as
much as possible -- that means using a custom prompt, removing user
instructions, and starting a clean chat history.

We also want to make sure the review thread doesn't leak into the parent
thread.

### Why do this as a mode, vs. sub-agents?

1. We want review to be a synchronous task, so it's fine for now to do a
bespoke implementation.
2. We're still unclear about the final structure for sub-agents. We'd
prefer to land this quickly and then refactor into sub-agents without
rushing that implementation.
											
										
										
											2025-09-12 16:25:10 -07:00
+								pub struct ReviewLineRange {
 								    pub start: u32,
 								    pub end: u32,
 								}
-												chore: upgrade to Rust 1.92.0 (#8860)

**Summary**
- Upgrade Rust toolchain used by CI to 1.92.0.
- Address new clippy `derivable_impls` warnings by deriving `Default`
for enums across protocol, core, backend openapi models, and
windows-sandbox setup.
- Tidy up related test/config behavior (originator header handling, env
override cleanup) and remove a now-unused assignment in TUI/TUI2 render
layout.

**Testing**
- `just fmt`
- `just fix -p codex-tui`
- `just fix -p codex-tui2`
- `just fix -p codex-windows-sandbox`
- `cargo test -p codex-tui`
- `cargo test -p codex-tui2`
- `cargo test -p codex-windows-sandbox`
- `cargo test -p codex-core --test all`
- `cargo test -p codex-app-server --test all`
- `cargo test -p codex-mcp-server --test all`
- `cargo test --all-features`
											
										
										
											2026-01-16 11:12:52 -08:00
+								#[derive(
 								    Debug, Clone, Copy, Display, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS, Default,
 								)]
-												feat: better UI for unified_exec (#6515)

<img width="376" height="132" alt="Screenshot 2025-11-12 at 17 36 22"
src="https://github.com/user-attachments/assets/ce693f0d-5ca0-462e-b170-c20811dcc8d5"
/>
											
										
										
											2025-11-14 16:31:12 +01:00
+								#[serde(rename_all = "snake_case")]
 								pub enum ExecCommandSource {
-												chore: upgrade to Rust 1.92.0 (#8860)

**Summary**
- Upgrade Rust toolchain used by CI to 1.92.0.
- Address new clippy `derivable_impls` warnings by deriving `Default`
for enums across protocol, core, backend openapi models, and
windows-sandbox setup.
- Tidy up related test/config behavior (originator header handling, env
override cleanup) and remove a now-unused assignment in TUI/TUI2 render
layout.

**Testing**
- `just fmt`
- `just fix -p codex-tui`
- `just fix -p codex-tui2`
- `just fix -p codex-windows-sandbox`
- `cargo test -p codex-tui`
- `cargo test -p codex-tui2`
- `cargo test -p codex-windows-sandbox`
- `cargo test -p codex-core --test all`
- `cargo test -p codex-app-server --test all`
- `cargo test -p codex-mcp-server --test all`
- `cargo test --all-features`
											
										
										
											2026-01-16 11:12:52 -08:00
+								    #[default]
-												feat: better UI for unified_exec (#6515)

<img width="376" height="132" alt="Screenshot 2025-11-12 at 17 36 22"
src="https://github.com/user-attachments/assets/ce693f0d-5ca0-462e-b170-c20811dcc8d5"
/>
											
										
										
											2025-11-14 16:31:12 +01:00
+								    Agent,
 								    UserShell,
 								    UnifiedExecStartup,
 								    UnifiedExecInteraction,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct ExecCommandBeginEvent {
 								    /// Identifier so this can be paired with the ExecCommandEnd event.
 								    pub call_id: String,
-												feat: update process ID for event handling (#7261)


											
										
										
											2025-11-25 22:21:05 +00:00
+								    /// Identifier for the underlying PTY process (when available).
 								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub process_id: Option<String>,
-												[app-server] feat: add v2 command execution approval flow (#6758)

This PR adds the API V2 version of the command‑execution approval flow
for the shell tool.

This PR wires the new RPC (`item/commandExecution/requestApproval`, V2
only) and related events (`item/started`, `item/completed`, and
`item/commandExecution/delta`, which are emitted in both V1 and V2)
through the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

The approach I took was to make as few changes to the Codex core as
possible, leveraging existing `EventMsg` core events, and translating
those in app-server. I did have to add additional fields to
`EventMsg::ExecCommandEndEvent` to capture the command's input so that
app-server can statelessly transform these events to a
`ThreadItem::CommandExecution` item for the `item/completed` event.

Once we stabilize the API and it's complete enough for our partners, we
can work on migrating the core to be aware of command execution items as
a first-class concept.

**Note**: We'll need followup work to make sure these APIs work for the
unified exec tool, but will wait til that's stable and landed before
doing a pass on app-server.

Example payloads below:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": null,
      "exitCode": null,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "inProgress",
      "type": "commandExecution"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/commandExecution/requestApproval",
  "params": {
    "itemId": "call_lNWWsbXl1e47qNaYjFRs0dyU",
    "parsedCmd": [
      {
        "cmd": "touch /tmp/should-trigger-approval",
        "type": "unknown"
      }
    ],
    "reason": "Need to create file in /tmp which is outside workspace sandbox",
    "risk": null,
    "threadId": "019a93e8-0a52-7fe3-9808-b6bc40c0989a",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "acceptSettings": {
      "forSession": false
    },
    "decision": "accept"
  }
}
```

```
{
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": 224,
      "exitCode": 0,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "completed",
      "type": "commandExecution"
    }
  }
}
```
											
										
										
											2025-11-17 16:23:54 -08:00
+								    /// Turn ID that this command belongs to.
 								    pub turn_id: String,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// The command to be executed.
 								    pub command: Vec<String>,
 								    /// The command's working directory if not the default cwd for the agent.
 								    pub cwd: PathBuf,
-												[1/3] Parse exec commands and format them more nicely in the UI (#2095)

# Note for reviewers
The bulk of this PR is in in the new file, `parse_command.rs`. This file
is designed to be written TDD and implemented with Codex. Do not worry
about reviewing the code, just review the unit tests (if you want). If
any cases are missing, we'll add more tests and have Codex fix them.

I think the best approach will be to land and iterate. I have some
follow-ups I want to do after this lands. The next PR after this will
let us merge (and dedupe) multiple sequential cells of the same such as
multiple read commands. The deduping will also be important because the
model often reads the same file multiple times in a row in chunks

===

This PR formats common commands like reading, formatting, testing, etc
more nicely:

It tries to extract things like file names, tests and falls back to the
cmd if it doesn't. It also only shows stdout/err if the command failed.

<img width="770" height="238" alt="CleanShot 2025-08-09 at 16 05 15"
src="https://github.com/user-attachments/assets/0ead179a-8910-486b-aa3d-7d26264d751e"
/>
<img width="348" height="158" alt="CleanShot 2025-08-09 at 16 05 32"
src="https://github.com/user-attachments/assets/4302681b-5e87-4ff3-85b4-0252c6c485a9"
/>
<img width="834" height="324" alt="CleanShot 2025-08-09 at 16 05 56 2"
src="https://github.com/user-attachments/assets/09fb3517-7bd6-40f6-a126-4172106b700f"
/>

Part 2: https://github.com/openai/codex/pull/2097
Part 3: https://github.com/openai/codex/pull/2110
											
										
										
											2025-08-11 11:26:15 -07:00
+								    pub parsed_cmd: Vec<ParsedCommand>,
-												feat: better UI for unified_exec (#6515)

<img width="376" height="132" alt="Screenshot 2025-11-12 at 17 36 22"
src="https://github.com/user-attachments/assets/ce693f0d-5ca0-462e-b170-c20811dcc8d5"
/>
											
										
										
											2025-11-14 16:31:12 +01:00
+								    /// Where the command originated. Defaults to Agent for backward compatibility.
-												feature: Add "!cmd" user shell execution (#2471)

feature: Add "!cmd" user shell execution

This change lets users run local shell commands directly from the TUI by
prefixing their input with ! (e.g. !ls). Output is truncated to keep the
exec cell usable, and Ctrl-C cleanly
  interrupts long-running commands (e.g. !sleep 10000).

**Summary of changes**

- Route Op::RunUserShellCommand through a dedicated UserShellCommandTask
(core/src/tasks/user_shell.rs), keeping the task logic out of codex.rs.
- Reuse the existing tool router: the task constructs a ToolCall for the
local_shell tool and relies on ShellHandler, so no manual MCP tool
lookup is required.
- Emit exec lifecycle events (ExecCommandBegin/ExecCommandEnd) so the
TUI can show command metadata, live output, and exit status.

**End-to-end flow**

  **TUI handling**

1. ChatWidget::submit_user_message (TUI) intercepts messages starting
with !.
2. Non-empty commands dispatch Op::RunUserShellCommand { command };
empty commands surface a help hint.
3. No UserInput items are created, so nothing is enqueued for the model.

  **Core submission loop**
4. The submission loop routes the op to handlers::run_user_shell_command
(core/src/codex.rs).
5. A fresh TurnContext is created and Session::spawn_user_shell_command
enqueues UserShellCommandTask.

  **Task execution**
6. UserShellCommandTask::run emits TaskStartedEvent, formats the
command, and prepares a ToolCall targeting local_shell.
  7. ToolCallRuntime::handle_tool_call dispatches to ShellHandler.

  **Shell tool runtime**
8. ShellHandler::run_exec_like launches the process via the unified exec
runtime, honoring sandbox and shell policies, and emits
ExecCommandBegin/End.
9. Stdout/stderr are captured for the UI, but the task does not turn the
resulting ToolOutput into a model response.

  **Completion**
10. After ExecCommandEnd, the task finishes without an assistant
message; the session marks it complete and the exec cell displays the
final output.

  **Conversation context**

- The command and its output never enter the conversation history or the
model prompt; the flow is local-only.
  - Only exec/task events are emitted for UI rendering.

**Demo video**


https://github.com/user-attachments/assets/fcd114b0-4304-4448-a367-a04c43e0b996
											
										
										
											2025-10-29 00:31:20 -07:00
+								    #[serde(default)]
-												feat: better UI for unified_exec (#6515)

<img width="376" height="132" alt="Screenshot 2025-11-12 at 17 36 22"
src="https://github.com/user-attachments/assets/ce693f0d-5ca0-462e-b170-c20811dcc8d5"
/>
											
										
										
											2025-11-14 16:31:12 +01:00
+								    pub source: ExecCommandSource,
 								    /// Raw input sent to a unified exec session (if this is an interaction event).
 								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub interaction_input: Option<String>,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct ExecCommandEndEvent {
 								    /// Identifier for the ExecCommandBegin that finished.
 								    pub call_id: String,
-												feat: update process ID for event handling (#7261)


											
										
										
											2025-11-25 22:21:05 +00:00
+								    /// Identifier for the underlying PTY process (when available).
 								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub process_id: Option<String>,
-												[app-server] feat: add v2 command execution approval flow (#6758)

This PR adds the API V2 version of the command‑execution approval flow
for the shell tool.

This PR wires the new RPC (`item/commandExecution/requestApproval`, V2
only) and related events (`item/started`, `item/completed`, and
`item/commandExecution/delta`, which are emitted in both V1 and V2)
through the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

The approach I took was to make as few changes to the Codex core as
possible, leveraging existing `EventMsg` core events, and translating
those in app-server. I did have to add additional fields to
`EventMsg::ExecCommandEndEvent` to capture the command's input so that
app-server can statelessly transform these events to a
`ThreadItem::CommandExecution` item for the `item/completed` event.

Once we stabilize the API and it's complete enough for our partners, we
can work on migrating the core to be aware of command execution items as
a first-class concept.

**Note**: We'll need followup work to make sure these APIs work for the
unified exec tool, but will wait til that's stable and landed before
doing a pass on app-server.

Example payloads below:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": null,
      "exitCode": null,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "inProgress",
      "type": "commandExecution"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/commandExecution/requestApproval",
  "params": {
    "itemId": "call_lNWWsbXl1e47qNaYjFRs0dyU",
    "parsedCmd": [
      {
        "cmd": "touch /tmp/should-trigger-approval",
        "type": "unknown"
      }
    ],
    "reason": "Need to create file in /tmp which is outside workspace sandbox",
    "risk": null,
    "threadId": "019a93e8-0a52-7fe3-9808-b6bc40c0989a",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "acceptSettings": {
      "forSession": false
    },
    "decision": "accept"
  }
}
```

```
{
  "params": {
    "item": {
      "aggregatedOutput": null,
      "command": "/bin/zsh -lc 'touch /tmp/should-trigger-approval'",
      "cwd": "/Users/owen/repos/codex/codex-rs",
      "durationMs": 224,
      "exitCode": 0,
      "id": "call_lNWWsbXl1e47qNaYjFRs0dyU",
      "parsedCmd": [
        {
          "cmd": "touch /tmp/should-trigger-approval",
          "type": "unknown"
        }
      ],
      "status": "completed",
      "type": "commandExecution"
    }
  }
}
```
											
										
										
											2025-11-17 16:23:54 -08:00
+								    /// Turn ID that this command belongs to.
 								    pub turn_id: String,
 								    /// The command that was executed.
 								    pub command: Vec<String>,
 								    /// The command's working directory if not the default cwd for the agent.
 								    pub cwd: PathBuf,
 								    pub parsed_cmd: Vec<ParsedCommand>,
 								    /// Where the command originated. Defaults to Agent for backward compatibility.
 								    #[serde(default)]
 								    pub source: ExecCommandSource,
 								    /// Raw input sent to a unified exec session (if this is an interaction event).
 								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub interaction_input: Option<String>,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Captured stdout
 								    pub stdout: String,
 								    /// Captured stderr
 								    pub stderr: String,
-												send-aggregated output (#2364)

We want to send an aggregated output of stderr and stdout so we don't
have to aggregate it stderr+stdout as we lose order sometimes.

---------

Co-authored-by: Gabriel Peal <gpeal@users.noreply.github.com>
											
										
										
											2025-08-23 09:54:31 -07:00
+								    /// Captured aggregated output
 								    #[serde(default)]
 								    pub aggregated_output: String,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// The command's exit code.
 								    pub exit_code: i32,
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								    /// The duration of the command execution.
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								    #[ts(type = "string")]
-												fix command duration display (#1806)

we were always displaying "0ms" before.

<img width="731" height="101" alt="Screenshot 2025-08-02 at 10 51 22 PM"
src="https://github.com/user-attachments/assets/f56814ed-b9a4-4164-9e78-181c60ce19b7"
/>
											
										
										
											2025-08-03 11:33:44 -07:00
+								    pub duration: Duration,
-												tui: coalesce command output; show unabridged commands in transcript (#2590)

https://github.com/user-attachments/assets/effec7c7-732a-4b61-a2ae-3cb297b6b19b
											
										
										
											2025-08-22 16:32:31 -07:00
+								    /// Formatted output from the command, as seen by the model.
 								    pub formatted_output: String,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												show "Viewed Image" when the model views an image (#4475)

<img width="1022" height="339" alt="Screenshot 2025-09-29 at 4 22 00 PM"
src="https://github.com/user-attachments/assets/12da7358-19be-4010-a71b-496ede6dfbbf"
/>
											
										
										
											2025-10-02 11:36:03 -07:00
+								pub struct ViewImageToolCallEvent {
 								    /// Identifier for the originating tool call.
 								    pub call_id: String,
 								    /// Local filesystem path provided to the tool.
 								    pub path: PathBuf,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												collabse `stdout` and `stderr` delta events into one (#1787)


											
										
										
											2025-08-01 14:00:19 -07:00
+								#[serde(rename_all = "snake_case")]
 								pub enum ExecOutputStream {
 								    Stdout,
 								    Stderr,
-												feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
											
										
										
											2025-08-01 13:04:34 -07:00
+								}
-												fix: use a more efficient wire format for ExecCommandOutputDeltaEvent.chunk (#3163)

When serializing to JSON, the existing solution created an enormous
array of ints, which is far more bytes on the wire than a base64-encoded
string would be.
											
										
										
											2025-09-04 08:21:58 -07:00
+								#[serde_as]
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												collabse `stdout` and `stderr` delta events into one (#1787)


											
										
										
											2025-08-01 14:00:19 -07:00
+								pub struct ExecCommandOutputDeltaEvent {
-												feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
											
										
										
											2025-08-01 13:04:34 -07:00
+								    /// Identifier for the ExecCommandBegin that produced this chunk.
 								    pub call_id: String,
-												collabse `stdout` and `stderr` delta events into one (#1787)


											
										
										
											2025-08-01 14:00:19 -07:00
+								    /// Which stream produced this chunk.
 								    pub stream: ExecOutputStream,
 								    /// Raw bytes from the stream (may not be valid UTF-8).
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
+								    #[serde_as(as = "serde_with::base64::Base64")]
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								    #[schemars(with = "String")]
-												Generate more typescript types and return conversation id with ConversationSummary (#3219)

This PR does multiple things that are necessary for conversation resume
to work from the extension. I wanted to make sure everything worked so
these changes wound up in one PR:
1. Generate more ts types
2. Resume rollout history files rather than create a new one every time
it is resumed so you don't see a duplicate conversation in history for
every resume. Chatted with @aibrahim-oai to verify this
3. Return conversation_id in conversation summaries
4. [Cleanup] Use serde and strong types for a lot of the rollout file
parsing
											
										
										
											2025-09-08 14:54:47 -07:00
+								    #[ts(type = "string")]
-												fix: use a more efficient wire format for ExecCommandOutputDeltaEvent.chunk (#3163)

When serializing to JSON, the existing solution created an enormous
array of ints, which is far more bytes on the wire than a base64-encoded
string would be.
											
										
										
											2025-09-04 08:21:58 -07:00
+								    pub chunk: Vec<u8>,
-												feat: stream exec stdout events (#1786)

## Summary
- stream command stdout as `ExecCommandStdout` events
- forward streamed stdout to clients and ignore in human output
processor
- adjust call sites for new streaming API
											
										
										
											2025-08-01 13:04:34 -07:00
+								}
-												chore: rework unified exec events (#7775)


											
										
										
											2025-12-10 10:30:38 +00:00
+								#[serde_as]
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct TerminalInteractionEvent {
 								    /// Identifier for the ExecCommandBegin that produced this chunk.
 								    pub call_id: String,
 								    /// Process id associated with the running command.
 								    pub process_id: String,
 								    /// Stdin sent to the running session.
 								    pub stdin: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct BackgroundEventEvent {
 								    pub message: String,
 								}
-												feat: deprecation warning (#5825)

<img width="955" height="311" alt="Screenshot 2025-10-28 at 14 26 25"
src="https://github.com/user-attachments/assets/99729b3d-3bc9-4503-aab3-8dc919220ab4"
/>
											
										
										
											2025-10-29 12:29:28 +00:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct DeprecationNoticeEvent {
 								    /// Concise summary of what is deprecated.
 								    pub summary: String,
 								    /// Optional extra guidance, such as migration steps or rationale.
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub details: Option<String>,
 								}
-												feat: TUI undo op (#5629)


											
										
										
											2025-10-27 10:55:29 +00:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct UndoStartedEvent {
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub message: Option<String>,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct UndoCompletedEvent {
 								    pub success: bool,
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub message: Option<String>,
 								}
-												feat(app-server): thread/rollback API (#8454)

Add `thread/rollback` to app-server to support IDEs undo-ing the last N
turns of a thread.

For context, an IDE partner will be supporting an "undo" capability
where the IDE (the app-server client) will be responsible for reverting
the local changes made during the last turn. To support this well, we
also need a way to drop the last turn (or more generally, the last N
turns) from the agent's context. This is what `thread/rollback` does.

**Core idea**: A Thread rollback is represented as a persisted event
message (EventMsg::ThreadRollback) in the rollout JSONL file, not by
rewriting history. On resume, both the model's context (core replay) and
the UI turn list (app-server v2's thread history builder) apply these
markers so the pruned history is consistent across live conversations
and `thread/resume`.

Implementation notes:
- Rollback only affects agent context and appends to the rollout file;
clients are responsible for reverting files on disk.
- If a thread rollback is currently in progress, subsequent
`thread/rollback` calls are rejected.
- Because we use `CodexConversation::submit` and codex core tracks
active turns, returning an error on concurrent rollbacks is communicated
via an `EventMsg::Error` with a new variant
`CodexErrorInfo::ThreadRollbackFailed`. app-server watches for that and
sends the BAD_REQUEST RPC response.

Tests cover thread rollbacks in both core and app-server, including when
`num_turns` > existing turns (which clears all turns).

**Note**: this explicitly does **not** behave like `/undo` which we just
removed from the CLI, which does the opposite of what `thread/rollback`
does. `/undo` reverts local changes via ghost commits/snapshots and does
not modify the agent's context / conversation history.
											
										
										
											2026-01-06 13:23:48 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct ThreadRolledBackEvent {
 								    /// Number of user turns that were removed from context.
 								    pub num_turns: u32,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								pub struct StreamErrorEvent {
 								    pub message: String,
-												[app-server & core] introduce new codex error code and v2 app-server error events (#6938)

This PR does two things:
1. populate a new `codex_error_code` protocol in error events sent from
core to client;
2. old v1 core events `codex/event/stream_error` and `codex/event/error`
will now both become `error`. We also show codex error code for
turncompleted -> error status.

new events in app server test:
```
< {
<   "method": "codex/event/stream_error",
<   "params": {
<     "conversationId": "019aa34c-0c14-70e0-9706-98520a760d67",
<     "id": "0",
<     "msg": {
<       "codex_error_code": {
<         "response_stream_disconnected": {
<           "http_status_code": 401
<         }
<       },
<       "message": "Reconnecting... 2/5",
<       "type": "stream_error"
<     }
<   }
< }

 {
<   "method": "error",
<   "params": {
<     "error": {
<       "codexErrorCode": {
<         "responseStreamDisconnected": {
<           "httpStatusCode": 401
<         }
<       },
<       "message": "Reconnecting... 2/5"
<     }
<   }
< }

< {
<   "method": "turn/completed",
<   "params": {
<     "turn": {
<       "error": {
<         "codexErrorCode": {
<           "responseTooManyFailedAttempts": {
<             "httpStatusCode": 401
<           }
<         },
<         "message": "exceeded retry limit, last status: 401 Unauthorized, request id: 9a1b495a1a97ed3e-SJC"
<       },
<       "id": "0",
<       "items": [],
<       "status": "failed"
<     }
<   }
< }
```
											
										
										
											2025-11-20 15:06:55 -08:00
+								    #[serde(default)]
-												[app-server] update doc with codex error info (#6941)

Document new codex error info. Also fixed the name from
`codex_error_code` to `codex_error_info`.
											
										
										
											2025-11-20 17:02:37 -08:00
+								    pub codex_error_info: Option<CodexErrorInfo>,
-												[chore] add additional_details to StreamErrorEvent + wire through (#8307)

### What

Builds on #8293.

Add `additional_details`, which contains the upstream error message, to
relevant structures used to pass along retryable `StreamError`s.

Uses the new TUI status indicator's `details` field (shows under the
status header) to display the `additional_details` error to the user on
retryable `Reconnecting...` errors. This adds clarity for users for
retryable errors.

Will make corresponding change to VSCode extension to show
`additional_details` as expandable from the `Reconnecting...` cell.

Examples:
<img width="1012" height="326" alt="image"
src="https://github.com/user-attachments/assets/f35e7e6a-8f5e-4a2f-a764-358101776996"
/>

<img width="1526" height="358" alt="image"
src="https://github.com/user-attachments/assets/0029cbc0-f062-4233-8650-cc216c7808f0"
/>
											
										
										
											2025-12-24 10:07:38 -08:00
+								    /// Optional details about the underlying stream failure (often the same
 								    /// human-readable message that is surfaced as the terminal error if retries
 								    /// are exhausted).
 								    #[serde(default)]
 								    pub additional_details: Option<String>,
-												Parse and expose stream errors (#2540)


											
										
										
											2025-08-21 01:15:24 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												feat: message when stream get correctly resumed (#4988)

<img width="366" height="109" alt="Screenshot 2025-10-09 at 17 44 16"
src="https://github.com/user-attachments/assets/26bc6f60-11bc-4fc6-a1cc-430ca1203969"
/>
											
										
										
											2025-10-10 10:07:14 +01:00
+								pub struct StreamInfoEvent {
 								    pub message: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct PatchApplyBeginEvent {
 								    /// Identifier so this can be paired with the PatchApplyEnd event.
 								    pub call_id: String,
-												[app-server] feat: v2 apply_patch approval flow  (#6760)

This PR adds the API V2 version of the apply_patch approval flow, which
centers around `ThreadItem::FileChange`.

This PR wires the new RPC (`item/fileChange/requestApproval`, V2 only)
and related events (`item/started`, `item/completed` for
`ThreadItem::FileChange`, which are emitted in both V1 and V2) through
the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

Similar to https://github.com/openai/codex/pull/6758, the approach I
took was to make as few changes to the Codex core as possible,
leveraging existing `EventMsg` core events, and translating those in
app-server. I did have to add a few additional fields to
`EventMsg::PatchApplyBegin` and `EventMsg::PatchApplyEnd`, but those
were fairly lightweight.

However, the `EventMsg`s emitted by core are the following:
```
1) Auto-approved (no request for approval) 
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

2) Approved by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

3) Declined by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd
```

For a request triggering an approval, this would result in:
```
item/fileChange/requestApproval
item/started
item/completed
```

which is different from the `ThreadItem::CommandExecution` flow
introduced in https://github.com/openai/codex/pull/6758, which does the
below and is preferable:
```
item/started
item/commandExecution/requestApproval
item/completed
```

To fix this, we leverage `TurnSummaryStore` on codex_message_processor
to store a little bit of state, allowing us to fire `item/started` and
`item/fileChange/requestApproval` whenever we receive the underlying
`EventMsg::ApplyPatchApprovalRequest`, and no-oping when we receive the
`EventMsg::PatchApplyBegin` later.

This is much less invasive than modifying the order of EventMsg within
core (I tried).

The resulting payloads:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "inProgress",
      "type": "fileChange"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/fileChange/requestApproval",
  "params": {
    "grantRoot": null,
    "itemId": "call_Nxnwj7B3YXigfV6Mwh03d686",
    "reason": null,
    "threadId": "019a9e11-8295-7883-a283-779e06502c6f",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "decision": "accept"
  }
}
```

```
{
  "method": "item/completed",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "completed",
      "type": "fileChange"
    }
  }
}
```
											
										
										
											2025-11-19 20:13:31 -08:00
+								    /// Turn ID that this patch belongs to.
 								    /// Uses `#[serde(default)]` for backwards compatibility.
 								    #[serde(default)]
 								    pub turn_id: String,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// If true, there was no ApplyPatchApprovalRequest for this patch.
 								    pub auto_approved: bool,
 								    /// The changes to be applied.
 								    pub changes: HashMap<PathBuf, FileChange>,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								pub struct PatchApplyEndEvent {
 								    /// Identifier for the PatchApplyBegin that finished.
 								    pub call_id: String,
-												[app-server] feat: v2 apply_patch approval flow  (#6760)

This PR adds the API V2 version of the apply_patch approval flow, which
centers around `ThreadItem::FileChange`.

This PR wires the new RPC (`item/fileChange/requestApproval`, V2 only)
and related events (`item/started`, `item/completed` for
`ThreadItem::FileChange`, which are emitted in both V1 and V2) through
the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

Similar to https://github.com/openai/codex/pull/6758, the approach I
took was to make as few changes to the Codex core as possible,
leveraging existing `EventMsg` core events, and translating those in
app-server. I did have to add a few additional fields to
`EventMsg::PatchApplyBegin` and `EventMsg::PatchApplyEnd`, but those
were fairly lightweight.

However, the `EventMsg`s emitted by core are the following:
```
1) Auto-approved (no request for approval) 
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

2) Approved by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

3) Declined by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd
```

For a request triggering an approval, this would result in:
```
item/fileChange/requestApproval
item/started
item/completed
```

which is different from the `ThreadItem::CommandExecution` flow
introduced in https://github.com/openai/codex/pull/6758, which does the
below and is preferable:
```
item/started
item/commandExecution/requestApproval
item/completed
```

To fix this, we leverage `TurnSummaryStore` on codex_message_processor
to store a little bit of state, allowing us to fire `item/started` and
`item/fileChange/requestApproval` whenever we receive the underlying
`EventMsg::ApplyPatchApprovalRequest`, and no-oping when we receive the
`EventMsg::PatchApplyBegin` later.

This is much less invasive than modifying the order of EventMsg within
core (I tried).

The resulting payloads:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "inProgress",
      "type": "fileChange"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/fileChange/requestApproval",
  "params": {
    "grantRoot": null,
    "itemId": "call_Nxnwj7B3YXigfV6Mwh03d686",
    "reason": null,
    "threadId": "019a9e11-8295-7883-a283-779e06502c6f",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "decision": "accept"
  }
}
```

```
{
  "method": "item/completed",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "completed",
      "type": "fileChange"
    }
  }
}
```
											
										
										
											2025-11-19 20:13:31 -08:00
+								    /// Turn ID that this patch belongs to.
 								    /// Uses `#[serde(default)]` for backwards compatibility.
 								    #[serde(default)]
 								    pub turn_id: String,
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Captured stdout (summary printed by apply_patch).
 								    pub stdout: String,
 								    /// Captured stderr (parser errors, IO failures, etc.).
 								    pub stderr: String,
 								    /// Whether the patch was applied successfully.
 								    pub success: bool,
-												[app-server] feat: v2 apply_patch approval flow  (#6760)

This PR adds the API V2 version of the apply_patch approval flow, which
centers around `ThreadItem::FileChange`.

This PR wires the new RPC (`item/fileChange/requestApproval`, V2 only)
and related events (`item/started`, `item/completed` for
`ThreadItem::FileChange`, which are emitted in both V1 and V2) through
the app-server
protocol. The new approval RPC is only sent when the user initiates a
turn with the new `turn/start` API so we don't break backwards
compatibility with VSCE.

Similar to https://github.com/openai/codex/pull/6758, the approach I
took was to make as few changes to the Codex core as possible,
leveraging existing `EventMsg` core events, and translating those in
app-server. I did have to add a few additional fields to
`EventMsg::PatchApplyBegin` and `EventMsg::PatchApplyEnd`, but those
were fairly lightweight.

However, the `EventMsg`s emitted by core are the following:
```
1) Auto-approved (no request for approval) 
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

2) Approved by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd

3) Declined by user
- EventMsg::ApplyPatchApprovalRequest
- EventMsg::PatchApplyBegin
- EventMsg::PatchApplyEnd
```

For a request triggering an approval, this would result in:
```
item/fileChange/requestApproval
item/started
item/completed
```

which is different from the `ThreadItem::CommandExecution` flow
introduced in https://github.com/openai/codex/pull/6758, which does the
below and is preferable:
```
item/started
item/commandExecution/requestApproval
item/completed
```

To fix this, we leverage `TurnSummaryStore` on codex_message_processor
to store a little bit of state, allowing us to fire `item/started` and
`item/fileChange/requestApproval` whenever we receive the underlying
`EventMsg::ApplyPatchApprovalRequest`, and no-oping when we receive the
`EventMsg::PatchApplyBegin` later.

This is much less invasive than modifying the order of EventMsg within
core (I tried).

The resulting payloads:
```
{
  "method": "item/started",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "inProgress",
      "type": "fileChange"
    }
  }
}
```

```
{
  "id": 0,
  "method": "item/fileChange/requestApproval",
  "params": {
    "grantRoot": null,
    "itemId": "call_Nxnwj7B3YXigfV6Mwh03d686",
    "reason": null,
    "threadId": "019a9e11-8295-7883-a283-779e06502c6f",
    "turnId": "1"
  }
}
```

```
{
  "id": 0,
  "result": {
    "decision": "accept"
  }
}
```

```
{
  "method": "item/completed",
  "params": {
    "item": {
      "changes": [
        {
          "diff": "Hello from Codex!\n",
          "kind": "add",
          "path": "/Users/owen/repos/codex/codex-rs/APPROVAL_DEMO.txt"
        }
      ],
      "id": "call_Nxnwj7B3YXigfV6Mwh03d686",
      "status": "completed",
      "type": "fileChange"
    }
  }
}
```
											
										
										
											2025-11-19 20:13:31 -08:00
+								    /// The changes that were applied (mirrors PatchApplyBeginEvent::changes).
 								    #[serde(default)]
 								    pub changes: HashMap<PathBuf, FileChange>,
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Add a TurnDiffTracker to create a unified diff for an entire turn (#1770)

This lets us show an accumulating diff across all patches in a turn.
Refer to the docs for TurnDiffTracker for implementation details.

There are multiple ways this could have been done and this felt like the
right tradeoff between reliability and completeness:
*Pros*
* It will pick up all changes to files that the model touched including
if they prettier or another command that updates them.
* It will not pick up changes made by the user or other agents to files
it didn't modify.

*Cons*
* It will pick up changes that the user made to a file that the model
also touched
* It will not pick up changes to codegen or files that were not modified
with apply_patch
											
										
										
											2025-08-04 08:57:04 -07:00
+								pub struct TurnDiffEvent {
 								    pub unified_diff: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								pub struct GetHistoryEntryResponseEvent {
 								    pub offset: usize,
 								    pub log_id: u64,
 								    /// The entry at the requested offset, if available and parseable.
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub entry: Option<HistoryEntry>,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								pub struct McpListToolsResponseEvent {
 								    /// Fully qualified tool name -> tool definition.
 								    pub tools: std::collections::HashMap<String, McpTool>,
-												[MCP] Add support for resources (#5239)

This PR adds support for [MCP
resources](https://modelcontextprotocol.io/specification/2025-06-18/server/resources)
by adding three new tools for the model:
1. `list_resources`
2. `list_resource_templates`
3. `read_resource`

These 3 tools correspond to the [three primary MCP resource protocol
messages](https://modelcontextprotocol.io/specification/2025-06-18/server/resources#protocol-messages).

Example of listing and reading a GitHub resource tempalte
<img width="2984" height="804" alt="CleanShot 2025-10-15 at 17 31 10"
src="https://github.com/user-attachments/assets/89b7f215-2e2a-41c5-90dd-b932ac84a585"
/>

`/mcp` with Figma configured
<img width="2984" height="442" alt="CleanShot 2025-10-15 at 18 29 35"
src="https://github.com/user-attachments/assets/a7578080-2ed2-4c59-b9b4-d8461f90d8ee"
/>

Fixes #4956
											
										
										
											2025-10-16 22:05:15 -07:00
+								    /// Known resources grouped by server name.
 								    pub resources: std::collections::HashMap<String, Vec<McpResource>>,
 								    /// Known resource templates grouped by server name.
 								    pub resource_templates: std::collections::HashMap<String, Vec<McpResourceTemplate>>,
-												[MCP] Add auth status to MCP servers (#4918)

This adds a queryable auth status for MCP servers which is useful:
1. To determine whether a streamable HTTP server supports auth or not
based on whether or not it supports RFC 8414-3.2
2. Allow us to build a better user experience on top of MCP status
											
										
										
											2025-10-08 14:37:57 -07:00
+								    /// Authentication status for each configured MCP server.
 								    pub auth_statuses: std::collections::HashMap<String, McpAuthStatus>,
 								}
-												core/tui: non-blocking MCP startup (#6334)

This makes MCP startup not block TUI startup. Messages sent while MCPs
are booting will be queued.


https://github.com/user-attachments/assets/96e1d234-5d8f-4932-a935-a675d35c05e0


Fixes #6317

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-11-17 11:26:11 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct McpStartupUpdateEvent {
 								    /// Server name being started.
 								    pub server: String,
 								    /// Current startup status.
 								    pub status: McpStartupStatus,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								#[serde(rename_all = "snake_case", tag = "state")]
 								#[ts(rename_all = "snake_case", tag = "state")]
 								pub enum McpStartupStatus {
 								    Starting,
 								    Ready,
 								    Failed { error: String },
 								    Cancelled,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, Default)]
 								pub struct McpStartupCompleteEvent {
 								    pub ready: Vec<String>,
 								    pub failed: Vec<McpStartupFailure>,
 								    pub cancelled: Vec<String>,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct McpStartupFailure {
 								    pub server: String,
 								    pub error: String,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)]
-												[MCP] Add auth status to MCP servers (#4918)

This adds a queryable auth status for MCP servers which is useful:
1. To determine whether a streamable HTTP server supports auth or not
based on whether or not it supports RFC 8414-3.2
2. Allow us to build a better user experience on top of MCP status
											
										
										
											2025-10-08 14:37:57 -07:00
+								#[serde(rename_all = "snake_case")]
 								#[ts(rename_all = "snake_case")]
 								pub enum McpAuthStatus {
 								    Unsupported,
 								    NotLoggedIn,
 								    BearerToken,
 								    OAuth,
 								}
 								impl fmt::Display for McpAuthStatus {
 								    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
 								        let text = match self {
 								            McpAuthStatus::Unsupported => "Unsupported",
 								            McpAuthStatus::NotLoggedIn => "Not logged in",
 								            McpAuthStatus::BearerToken => "Bearer token",
 								            McpAuthStatus::OAuth => "OAuth",
 								        };
 								        f.write_str(text)
 								    }
-												[tui] Support /mcp command (#2430)

## Summary
Adds a `/mcp` command to list active tools. We can extend this command
to allow configuration of MCP tools, but for now a simple list command
will help debug if your config.toml and your tools are working as
expected.
											
										
										
											2025-08-19 09:00:31 -07:00
+								}
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								/// Response payload for `Op::ListCustomPrompts`.
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Custom /prompts (#2696)

Adds custom `/prompts` to `~/.codex/prompts/<command>.md`.

<img width="239" height="107" alt="Screenshot 2025-08-25 at 6 22 42 PM"
src="https://github.com/user-attachments/assets/fe6ebbaa-1bf6-49d3-95f9-fdc53b752679"
/>

---

Details:

1. Adds `Op::ListCustomPrompts` to core.
2. Returns `ListCustomPromptsResponse` with list of `CustomPrompt`
(name, content).
3. TUI calls the operation on load, and populates the custom prompts
(excluding prompts that collide with builtins).
4. Selecting the custom prompt automatically sends the prompt to the
agent.
											
										
										
											2025-08-28 19:16:39 -07:00
+								pub struct ListCustomPromptsResponseEvent {
 								    pub custom_prompts: Vec<CustomPrompt>,
 								}
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								/// Response payload for `Op::ListSkills`.
 								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct ListSkillsResponseEvent {
 								    pub skills: Vec<SkillsListEntry>,
 								}
 								#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, JsonSchema, TS)]
 								#[serde(rename_all = "snake_case")]
 								#[ts(rename_all = "snake_case")]
 								pub enum SkillScope {
 								    User,
 								    Repo,
-												Support SYSTEM skills. (#8220)

1. Remove PUBLIC skills and introduce SYSTEM skills embedded in the
binary and installed into $CODEX_HOME/skills/.system at startup.
2. Skills are now always enabled (feature flag removed).
3. Update skills/list to accept forceReload and plumb it through (not
used by clients yet).
											
										
										
											2025-12-17 18:48:28 -08:00
+								    System,
-												Support admin scope skills. (#8296)

a new scope reads from /etc/codex
											
										
										
											2025-12-18 18:28:56 -08:00
+								    Admin,
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								}
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								pub struct SkillMetadata {
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								    pub name: String,
 								    pub description: String,
-												Support skills shortDescription. (#8278)

Allow SKILL.md to specify a more human-readable short description as
skill metadata.
											
										
										
											2025-12-18 15:13:18 -08:00
+								    #[serde(default, skip_serializing_if = "Option::is_none")]
-												Support SKILL.toml file. (#9125)

We’re introducing a new SKILL.toml to hold skill metadata so Codex can
deliver a richer Skills experience.

Initial focus is the interface block:
```
[interface]
display_name = "Optional user-facing name"
short_description = "Optional user-facing description"
icon_small = "./assets/small-400px.png"
icon_large = "./assets/large-logo.svg"
brand_color = "#3B82F6"
default_prompt = "Optional surrounding prompt to use the skill with"
```

All fields are exposed via the app server API.
display_name and short_description are consumed by the TUI.
											
										
										
											2026-01-15 11:20:04 -08:00
+								    #[ts(optional)]
 								    /// Legacy short_description from SKILL.md. Prefer SKILL.toml interface.short_description.
-												Support skills shortDescription. (#8278)

Allow SKILL.md to specify a more human-readable short description as
skill metadata.
											
										
										
											2025-12-18 15:13:18 -08:00
+								    pub short_description: Option<String>,
-												Support SKILL.toml file. (#9125)

We’re introducing a new SKILL.toml to hold skill metadata so Codex can
deliver a richer Skills experience.

Initial focus is the interface block:
```
[interface]
display_name = "Optional user-facing name"
short_description = "Optional user-facing description"
icon_small = "./assets/small-400px.png"
icon_large = "./assets/large-logo.svg"
brand_color = "#3B82F6"
default_prompt = "Optional surrounding prompt to use the skill with"
```

All fields are exposed via the app server API.
display_name and short_description are consumed by the TUI.
											
										
										
											2026-01-15 11:20:04 -08:00
+								    #[serde(default, skip_serializing_if = "Option::is_none")]
 								    #[ts(optional)]
 								    pub interface: Option<SkillInterface>,
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								    pub path: PathBuf,
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								    pub scope: SkillScope,
-												Add UI for skill enable/disable. (#9627)

"/skill" will now allow you to enable/disable skills:
<img width="658" height="199" alt="image"
src="https://github.com/user-attachments/assets/bf8994c8-d6c1-462f-8bbb-f1ee9241caa4"
/>
											
										
										
											2026-01-21 18:21:12 -08:00
+								    pub enabled: bool,
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								}
-												Support SKILL.toml file. (#9125)

We’re introducing a new SKILL.toml to hold skill metadata so Codex can
deliver a richer Skills experience.

Initial focus is the interface block:
```
[interface]
display_name = "Optional user-facing name"
short_description = "Optional user-facing description"
icon_small = "./assets/small-400px.png"
icon_large = "./assets/large-logo.svg"
brand_color = "#3B82F6"
default_prompt = "Optional surrounding prompt to use the skill with"
```

All fields are exposed via the app server API.
display_name and short_description are consumed by the TUI.
											
										
										
											2026-01-15 11:20:04 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS, PartialEq, Eq)]
 								pub struct SkillInterface {
 								    #[ts(optional)]
 								    pub display_name: Option<String>,
 								    #[ts(optional)]
 								    pub short_description: Option<String>,
 								    #[ts(optional)]
 								    pub icon_small: Option<PathBuf>,
 								    #[ts(optional)]
 								    pub icon_large: Option<PathBuf>,
 								    #[ts(optional)]
 								    pub brand_color: Option<String>,
 								    #[ts(optional)]
 								    pub default_prompt: Option<String>,
 								}
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct SkillErrorInfo {
 								    pub path: PathBuf,
 								    pub message: String,
 								}
-												Reimplement skills loading using SkillsManager + skills/list op. (#7914)

refactor the way we load and manage skills:
1. Move skill discovery/caching into SkillsManager and reuse it across
sessions.
2. Add the skills/list API (Op::ListSkills/SkillsListResponse) to fetch
skills for one or more cwds. Also update app-server for VSCE/App;
3. Trigger skills/list during session startup so UIs preload skills and
handle errors immediately.
											
										
										
											2025-12-14 09:58:17 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
 								pub struct SkillsListEntry {
 								    pub cwd: PathBuf,
 								    pub skills: Vec<SkillMetadata>,
-												Inject SKILL.md when it's explicitly mentioned. (#7763)

1. Skills load once in core at session start; the cached outcome is
reused across core and surfaced to TUI via SessionConfigured.
2. TUI detects explicit skill selections, and core injects the matching
SKILL.md content into the turn when a selected skill is present.
											
										
										
											2025-12-10 13:59:17 -08:00
+								    pub errors: Vec<SkillErrorInfo>,
 								}
-												fix: add more fields to ThreadStartResponse and ThreadResumeResponse (#6847)

This adds the following fields to `ThreadStartResponse` and
`ThreadResumeResponse`:

```rust
    pub model: String,
    pub model_provider: String,
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox: SandboxPolicy,
    pub reasoning_effort: Option<ReasoningEffort>,
```

This is important because these fields are optional in
`ThreadStartParams` and `ThreadResumeParams`, so the caller needs to be
able to determine what values were ultimately used to start/resume the
conversation. (Though note that any of these could be changed later
between turns in the conversation.)

Though to get this information reliably, it must be read from the
internal `SessionConfiguredEvent` that is created in response to the
start of a conversation. Because `SessionConfiguredEvent` (as defined in
`codex-rs/protocol/src/protocol.rs`) did not have all of these fields, a
number of them had to be added as part of this PR.

Because `SessionConfiguredEvent` is referenced in many tests, test
instances of `SessionConfiguredEvent` had to be updated, as well, which
is why this PR touches so many files.
											
										
										
											2025-11-18 21:18:43 -08:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
+								pub struct SessionConfiguredEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								    /// Name left as session_id instead of thread_id for backwards compatibility.
 								    pub session_id: ThreadId,
-												feat: show forked from session id in /status (#9330)

Summary:
- Add forked_from to SessionMeta/SessionConfiguredEvent and persist it
for forked sessions.
- Surface forked_from in /status for tui + tui2 and add snapshots.
											
										
										
											2026-01-16 13:41:46 -08:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub forked_from_id: Option<ThreadId>,
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
 								    /// Tell the client what model is being queried.
 								    pub model: String,
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
-												fix: add more fields to ThreadStartResponse and ThreadResumeResponse (#6847)

This adds the following fields to `ThreadStartResponse` and
`ThreadResumeResponse`:

```rust
    pub model: String,
    pub model_provider: String,
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox: SandboxPolicy,
    pub reasoning_effort: Option<ReasoningEffort>,
```

This is important because these fields are optional in
`ThreadStartParams` and `ThreadResumeParams`, so the caller needs to be
able to determine what values were ultimately used to start/resume the
conversation. (Though note that any of these could be changed later
between turns in the conversation.)

Though to get this information reliably, it must be read from the
internal `SessionConfiguredEvent` that is created in response to the
start of a conversation. Because `SessionConfiguredEvent` (as defined in
`codex-rs/protocol/src/protocol.rs`) did not have all of these fields, a
number of them had to be added as part of this PR.

Because `SessionConfiguredEvent` is referenced in many tests, test
instances of `SessionConfiguredEvent` had to be updated, as well, which
is why this PR touches so many files.
											
										
										
											2025-11-18 21:18:43 -08:00
+								    pub model_provider_id: String,
 								    /// When to escalate for approval for execution
 								    pub approval_policy: AskForApproval,
 								    /// How to sandbox commands executed in the system
 								    pub sandbox_policy: SandboxPolicy,
 								    /// Working directory that should be treated as the *root* of the
 								    /// session.
 								    pub cwd: PathBuf,
-												feat: include reasoning_effort in NewConversationResponse (#3506)

`ClientRequest::NewConversation` picks up the reasoning level from the user's defaults in `config.toml`, so it should be reported in `NewConversationResponse`.
											
										
										
											2025-09-11 21:04:40 -07:00
+								    /// The effort the model is putting into reasoning about the user's request.
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub reasoning_effort: Option<ReasoningEffortConfig>,
-												feat: include reasoning_effort in NewConversationResponse (#3506)

`ClientRequest::NewConversation` picks up the reasoning level from the user's defaults in `config.toml`, so it should be reported in `NewConversationResponse`.
											
										
										
											2025-09-11 21:04:40 -07:00
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								    /// Identifier of the history log file (inode on Unix, 0 otherwise).
 								    pub history_log_id: u64,
 								    /// Current number of entries in the history log.
 								    pub history_entry_count: usize,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
 								    /// Optional initial messages (as events) for resumed sessions.
 								    /// When present, UIs can use these to seed the history.
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub initial_messages: Option<Vec<EventMsg>>,
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
-												feat: ephemeral threads (#9765)

Add ephemeral threads capabilities. Only exposed through the
`app-server` v2

The idea is to disable the rollout recorder for those threads.
											
										
										
											2026-01-24 15:57:40 +01:00
+								    /// Path in which the rollout is stored. Can be `None` for ephemeral threads
 								    #[serde(skip_serializing_if = "Option::is_none")]
 								    pub rollout_path: Option<PathBuf>,
-												fix: tighten up some logic around session timestamps and ids (#922)

* update `SessionConfigured` event to include the UUID for the session
* show the UUID in the Rust TUI
* use local timestamps in log files instead of UTC
* include timestamps in log file names for easier discovery
											
										
										
											2025-05-13 19:22:16 -07:00
+								}
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								/// User's decision in response to an ExecApprovalRequest.
-												whitelist command prefix integration in core and tui (#7033)

this PR enables TUI to approve commands and add their prefixes to an
allowlist:
<img width="708" height="605" alt="Screenshot 2025-11-21 at 4 18 07 PM"
src="https://github.com/user-attachments/assets/56a19893-4553-4770-a881-becf79eeda32"
/>

note: we only show the option to whitelist the command when 
1) command is not multi-part (e.g `git add -A && git commit -m 'hello
world'`)
2) command is not already matched by an existing rule
											
										
										
											2025-12-04 02:17:02 -05:00
+								#[derive(Debug, Default, Clone, Deserialize, Serialize, PartialEq, Eq, Display, JsonSchema, TS)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								#[serde(rename_all = "snake_case")]
 								pub enum ReviewDecision {
 								    /// User has approved this command and the agent should execute it.
 								    Approved,
-												Refactor execpolicy fallback evaluation (#7544)

## Refactor of the `execpolicy` crate

To illustrate why we need this refactor, consider an agent attempting to
run `apple | rm -rf ./`. Suppose `apple` is allowed by `execpolicy`.
Before this PR, `execpolicy` would consider `apple` and `pear` and only
render one rule match: `Allow`. We would skip any heuristics checks on
`rm -rf ./` and immediately approve `apple | rm -rf ./` to run.

To fix this, we now thread a `fallback` evaluation function into
`execpolicy` that runs when no `execpolicy` rules match a given command.
In our example, we would run `fallback` on `rm -rf ./` and prevent
`apple | rm -rf ./` from being run without approval.
											
										
										
											2025-12-04 02:39:48 -05:00
+								    /// User has approved this command and wants to apply the proposed execpolicy
 								    /// amendment so future matching commands are permitted.
 								    ApprovedExecpolicyAmendment {
 								        proposed_execpolicy_amendment: ExecPolicyAmendment,
 								    },
-												whitelist command prefix integration in core and tui (#7033)

this PR enables TUI to approve commands and add their prefixes to an
allowlist:
<img width="708" height="605" alt="Screenshot 2025-11-21 at 4 18 07 PM"
src="https://github.com/user-attachments/assets/56a19893-4553-4770-a881-becf79eeda32"
/>

note: we only show the option to whitelist the command when 
1) command is not multi-part (e.g `git add -A && git commit -m 'hello
world'`)
2) command is not already matched by an existing rule
											
										
										
											2025-12-04 02:17:02 -05:00
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    /// User has approved this command and wants to automatically approve any
 								    /// future identical instances (`command` and `cwd` match exactly) for the
 								    /// remainder of the session.
 								    ApprovedForSession,
 								    /// User has denied this command and the agent should not execute it, but
 								    /// it should continue the session and try something else.
 								    #[default]
 								    Denied,
 								    /// User has denied this command and the agent should not do anything until
 								    /// the user's next command.
 								    Abort,
 								}
-												chore: add approval metric (#8970)


											
										
										
											2026-01-09 13:10:31 +00:00
+								impl ReviewDecision {
 								    /// Returns an opaque version of the decision without PII. We can't use an ignored flag
 								    /// on `serde` because the serialization is required by some surfaces.
 								    pub fn to_opaque_string(&self) -> &'static str {
 								        match self {
 								            ReviewDecision::Approved => "approved",
 								            ReviewDecision::ApprovedExecpolicyAmendment { .. } => "approved_with_amendment",
 								            ReviewDecision::ApprovedForSession => "approved_for_session",
 								            ReviewDecision::Denied => "denied",
 								            ReviewDecision::Abort => "abort",
 								        }
 								    }
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												[app-server] small fixes for JSON schema export and one-of types (#6614)

A partner is consuming our generated JSON schema bundle for app-server
and identified a few issues:
- not all polymorphic / one-of types have a type descriminator
- `"$ref": "#/definitions/v2/SandboxPolicy"` is missing
- "Option<>" is an invalid schema name, and also unnecessary

This PR:
- adds the type descriminator to the various types that are missing it
except for `SessionSource` and `SubAgentSource` because they are
serialized to disk (adding this would break backwards compat for
resume), and they should not be necessary to consume for an integration
with app-server.
- removes the special handling in `export.rs` of various types like
SandboxPolicy, which turned out to be unnecessary and incorrect
- filters out `Option<>` which was auto-generated for request params
that don't need a body

For context, we currently pull in wayyy more types than we need through
the `EventMsg` god object which we are **not** planning to expose in API
v2 (this is how I suspect `SessionSource` and `SubAgentSource` are being
pulled in). But until we have all the necessary v2 notifications in
place that will allow us to remove `EventMsg`, we will keep exporting it
for now.
											
										
										
											2025-11-13 16:25:17 -08:00
+								#[serde(tag = "type", rename_all = "snake_case")]
 								#[ts(tag = "type")]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub enum FileChange {
 								    Add {
 								        content: String,
 								    },
-												rework message styling (#2877)

https://github.com/user-attachments/assets/cf07f62b-1895-44bb-b9c3-7a12032eb371
											
										
										
											2025-09-02 10:29:58 -07:00
+								    Delete {
 								        content: String,
 								    },
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								    Update {
 								        unified_diff: String,
 								        move_path: Option<PathBuf>,
 								    },
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												feat: initial import of Rust implementation of Codex CLI in codex-rs/ (#629)

As stated in `codex-rs/README.md`:

Today, Codex CLI is written in TypeScript and requires Node.js 22+ to
run it. For a number of users, this runtime requirement inhibits
adoption: they would be better served by a standalone executable. As
maintainers, we want Codex to run efficiently in a wide range of
environments with minimal overhead. We also want to take advantage of
operating system-specific APIs to provide better sandboxing, where
possible.

To that end, we are moving forward with a Rust implementation of Codex
CLI contained in this folder, which has the following benefits:

- The CLI compiles to small, standalone, platform-specific binaries.
- Can make direct, native calls to
[seccomp](https://man7.org/linux/man-pages/man2/seccomp.2.html) and
[landlock](https://man7.org/linux/man-pages/man7/landlock.7.html) in
order to support sandboxing on Linux.
- No runtime garbage collection, resulting in lower memory consumption
and better, more predictable performance.

Currently, the Rust implementation is materially behind the TypeScript
implementation in functionality, so continue to use the TypeScript
implmentation for the time being. We will publish native executables via
GitHub Releases as soon as we feel the Rust version is usable.
											
										
										
											2025-04-24 13:31:40 -07:00
+								pub struct Chunk {
 								    /// 1-based line index of the first line in the original file
 								    pub orig_index: u32,
 								    pub deleted_lines: Vec<String>,
 								    pub inserted_lines: Vec<String>,
 								}
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								pub struct TurnAbortedEvent {
 								    pub reason: TurnAbortReason,
 								}
-												Generate JSON schema for app-server protocol (#5063)

Add annotations and an export script that let us generate app-server
protocol types as typescript and JSONSchema.

The script itself is a bit hacky because we need to manually label some
of the types. Unfortunately it seems that enum variants don't get good
names by default and end up with something like `EventMsg1`,
`EventMsg2`, etc. I'm not an expert in this by any means, but since this
is only run manually and we already need to enumerate the types required
to describe the protocol, it didn't seem that much worse. An ideal
solution here would be to have some kind of root that we could generate
schemas for in one go, but I'm not sure if that's compatible with how we
generate the protocol today.
											
										
										
											2025-10-20 11:45:11 -07:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								#[serde(rename_all = "snake_case")]
 								pub enum TurnAbortReason {
 								    Interrupted,
 								    Replaced,
-												feat: /review (#3774)

Adds `/review` action in TUI

<img width="637" height="370" alt="Screenshot 2025-09-17 at 12 41 19 AM"
src="https://github.com/user-attachments/assets/b1979a6e-844a-4b97-ab20-107c185aec1d"
/>
											
										
										
											2025-09-18 14:14:16 -07:00
+								    ReviewEnded,
-												fix: introduce EventMsg::TurnAborted (#2365)

Introduces `EventMsg::TurnAborted` that should be sent in response to
`Op::Interrupt`.

In the MCP server, updates the handling of a
`ClientRequest::InterruptConversation` request such that it sends the
`Op::Interrupt` but does not respond to the request until it sees an
`EventMsg::TurnAborted`.
											
										
										
											2025-08-17 21:40:31 -07:00
+								}
-												feat: emit events around collab tools (#9095)

Emit the following events around the collab tools. On the `app-server`
this will be under `item/started` and `item/completed`
```
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the newly spawned agent, if it was created.
    pub new_thread_id: Option<ThreadId>,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
    /// Last known status of the new agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingBeginEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingEndEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Last known status of the receiver agent reported to the sender agent before
    /// the close.
    pub status: AgentStatus,
}
```
											
										
										
											2026-01-14 17:55:57 +00:00
+								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabAgentSpawnBeginEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
 								    /// beginning.
 								    pub prompt: String,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabAgentSpawnEndEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Thread ID of the newly spawned agent, if it was created.
 								    pub new_thread_id: Option<ThreadId>,
 								    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
 								    /// beginning.
 								    pub prompt: String,
 								    /// Last known status of the new agent reported to the sender agent.
 								    pub status: AgentStatus,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabAgentInteractionBeginEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Thread ID of the receiver.
 								    pub receiver_thread_id: ThreadId,
 								    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
 								    /// leaking at the beginning.
 								    pub prompt: String,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabAgentInteractionEndEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Thread ID of the receiver.
 								    pub receiver_thread_id: ThreadId,
 								    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
 								    /// leaking at the beginning.
 								    pub prompt: String,
 								    /// Last known status of the receiver agent reported to the sender agent.
 								    pub status: AgentStatus,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabWaitingBeginEvent {
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
-												feat: collab wait multiple IDs (#9294)


											
										
										
											2026-01-16 12:05:04 +01:00
+								    /// Thread ID of the receivers.
 								    pub receiver_thread_ids: Vec<ThreadId>,
-												feat: emit events around collab tools (#9095)

Emit the following events around the collab tools. On the `app-server`
this will be under `item/started` and `item/completed`
```
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the newly spawned agent, if it was created.
    pub new_thread_id: Option<ThreadId>,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
    /// Last known status of the new agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingBeginEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingEndEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Last known status of the receiver agent reported to the sender agent before
    /// the close.
    pub status: AgentStatus,
}
```
											
										
										
											2026-01-14 17:55:57 +00:00
+								    /// ID of the waiting call.
 								    pub call_id: String,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabWaitingEndEvent {
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// ID of the waiting call.
 								    pub call_id: String,
-												feat: collab wait multiple IDs (#9294)


											
										
										
											2026-01-16 12:05:04 +01:00
+								    /// Last known status of the receiver agents reported to the sender agent.
 								    pub statuses: HashMap<ThreadId, AgentStatus>,
-												feat: emit events around collab tools (#9095)

Emit the following events around the collab tools. On the `app-server`
this will be under `item/started` and `item/completed`
```
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentSpawnEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the newly spawned agent, if it was created.
    pub new_thread_id: Option<ThreadId>,
    /// Initial prompt sent to the agent. Can be empty to prevent CoT leaking at the
    /// beginning.
    pub prompt: String,
    /// Last known status of the new agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabAgentInteractionEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Prompt sent from the sender to the receiver. Can be empty to prevent CoT
    /// leaking at the beginning.
    pub prompt: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingBeginEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabWaitingEndEvent {
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// ID of the waiting call.
    pub call_id: String,
    /// Last known status of the receiver agent reported to the sender agent.
    pub status: AgentStatus,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseBeginEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
}

#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
pub struct CollabCloseEndEvent {
    /// Identifier for the collab tool call.
    pub call_id: String,
    /// Thread ID of the sender.
    pub sender_thread_id: ThreadId,
    /// Thread ID of the receiver.
    pub receiver_thread_id: ThreadId,
    /// Last known status of the receiver agent reported to the sender agent before
    /// the close.
    pub status: AgentStatus,
}
```
											
										
										
											2026-01-14 17:55:57 +00:00
+								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabCloseBeginEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Thread ID of the receiver.
 								    pub receiver_thread_id: ThreadId,
 								}
 								#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, JsonSchema, TS)]
 								pub struct CollabCloseEndEvent {
 								    /// Identifier for the collab tool call.
 								    pub call_id: String,
 								    /// Thread ID of the sender.
 								    pub sender_thread_id: ThreadId,
 								    /// Thread ID of the receiver.
 								    pub receiver_thread_id: ThreadId,
 								    /// Last known status of the receiver agent reported to the sender agent before
 								    /// the close.
 								    pub status: AgentStatus,
 								}
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								#[cfg(test)]
 								mod tests {
 								    use super::*;
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								    use crate::items::UserMessageItem;
 								    use crate::items::WebSearchItem;
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								    use anyhow::Result;
-												fix: add more fields to ThreadStartResponse and ThreadResumeResponse (#6847)

This adds the following fields to `ThreadStartResponse` and
`ThreadResumeResponse`:

```rust
    pub model: String,
    pub model_provider: String,
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox: SandboxPolicy,
    pub reasoning_effort: Option<ReasoningEffort>,
```

This is important because these fields are optional in
`ThreadStartParams` and `ThreadResumeParams`, so the caller needs to be
able to determine what values were ultimately used to start/resume the
conversation. (Though note that any of these could be changed later
between turns in the conversation.)

Though to get this information reliably, it must be read from the
internal `SessionConfiguredEvent` that is created in response to the
start of a conversation. Because `SessionConfiguredEvent` (as defined in
`codex-rs/protocol/src/protocol.rs`) did not have all of these fields, a
number of them had to be added as part of this PR.

Because `SessionConfiguredEvent` is referenced in many tests, test
instances of `SessionConfiguredEvent` had to be updated, as well, which
is why this PR touches so many files.
											
										
										
											2025-11-18 21:18:43 -08:00
+								    use pretty_assertions::assert_eq;
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								    use serde_json::json;
 								    use tempfile::NamedTempFile;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
-												feat: introduce ExternalSandbox policy (#8290)

## Description

Introduced `ExternalSandbox` policy to cover use case when sandbox
defined by outside environment, effectively it translates to
`SandboxMode#DangerFullAccess` for file system (since sandbox configured
on container level) and configurable `network_access` (either Restricted
or Enabled by outside environment).

as example you can configure `ExternalSandbox` policy as part of
`sendUserTurn` v1 app_server API:

```
 {
            "conversationId": <id>,
            "cwd": <cwd>,
            "approvalPolicy": "never",
            "sandboxPolicy": {
                  "type": ""external-sandbox",
                  "network_access": "enabled"/"restricted"
            },
            "model": <model>,
            "effort": <effort>,
            ....
        }
```
											
										
										
											2025-12-18 17:02:03 -08:00
+								    #[test]
 								    fn external_sandbox_reports_full_access_flags() {
 								        let restricted = SandboxPolicy::ExternalSandbox {
 								            network_access: NetworkAccess::Restricted,
 								        };
 								        assert!(restricted.has_full_disk_write_access());
 								        assert!(!restricted.has_full_network_access());
 								        let enabled = SandboxPolicy::ExternalSandbox {
 								            network_access: NetworkAccess::Enabled,
 								        };
 								        assert!(enabled.has_full_disk_write_access());
 								        assert!(enabled.has_full_network_access());
 								    }
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								    #[test]
 								    fn item_started_event_from_web_search_emits_begin_event() {
 								        let event = ItemStartedEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								            thread_id: ThreadId::new(),
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								            turn_id: "turn-1".into(),
 								            item: TurnItem::WebSearch(WebSearchItem {
 								                id: "search-1".into(),
 								                query: "find docs".into(),
-												fix: handle all web_search actions and in progress invocations (#9960)

### Summary
- Parse all `web_search` tool actions (`search`, `find_in_page`,
`open_page`).
- Previously we only parsed + displayed `search`, which made the TUI
appear to pause when the other actions were being used.
- Show in progress `web_search` calls as `Searching the web`
  - Previously we only showed completed tool calls

<img width="308" height="149" alt="image"
src="https://github.com/user-attachments/assets/90a4e8ff-b06a-48ff-a282-b57b31121845"
/>

### Tests
Added + updated tests, tested locally

### Follow ups
Update VSCode extension to display these as well
											
										
										
											2026-01-26 19:33:48 -08:00
+								                action: WebSearchAction::Search {
 								                    query: Some("find docs".into()),
 								                },
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								            }),
 								        };
 								        let legacy_events = event.as_legacy_events(false);
 								        assert_eq!(legacy_events.len(), 1);
 								        match &legacy_events[0] {
 								            EventMsg::WebSearchBegin(event) => assert_eq!(event.call_id, "search-1"),
 								            _ => panic!("expected WebSearchBegin event"),
 								        }
 								    }
 								    #[test]
 								    fn item_started_event_from_non_web_search_emits_no_legacy_events() {
 								        let event = ItemStartedEvent {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								            thread_id: ThreadId::new(),
-												Add item streaming events (#5546)

Adds AgentMessageContentDelta, ReasoningContentDelta,
ReasoningRawContentDelta item streaming events while maintaining
compatibility for old events.

---------

Co-authored-by: Owen Lin <owen@openai.com>
											
										
										
											2025-10-29 15:33:57 -07:00
+								            turn_id: "turn-1".into(),
 								            item: TurnItem::UserMessage(UserMessageItem::new(&[])),
 								        };
 								        assert!(event.as_legacy_events(false).is_empty());
 								    }
-												feat: expose outputSchema to user_turn/turn_start app_server API (#8377)

What changed
- Added `outputSchema` support to the app-server APIs, mirroring `codex
exec --output-schema` behavior.
- V1 `sendUserTurn` now accepts `outputSchema` and constrains the final
assistant message for that turn.
- V2 `turn/start` now accepts `outputSchema` and constrains the final
assistant message for that turn (explicitly per-turn only).

Core behavior
- `Op::UserTurn` already supported `final_output_json_schema`; now V1
`sendUserTurn` forwards `outputSchema` into that field.
- `Op::UserInput` now carries `final_output_json_schema` for per-turn
settings updates; core maps it into
`SessionSettingsUpdate.final_output_json_schema` so it applies to the
created turn context.
- V2 `turn/start` does NOT persist the schema via `OverrideTurnContext`
(it’s applied only for the current turn). Other overrides
(cwd/model/etc) keep their existing persistent behavior.

API / docs
- `codex-rs/app-server-protocol/src/protocol/v1.rs`: add `output_schema:
Option<serde_json::Value>` to `SendUserTurnParams` (serialized as
`outputSchema`).
- `codex-rs/app-server-protocol/src/protocol/v2.rs`: add `output_schema:
Option<JsonValue>` to `TurnStartParams` (serialized as `outputSchema`).
- `codex-rs/app-server/README.md`: document `outputSchema` for
`turn/start` and clarify it applies only to the current turn.
- `codex-rs/docs/codex_mcp_interface.md`: document `outputSchema` for v1
`sendUserTurn` and v2 `turn/start`.

Tests added/updated
- New app-server integration tests asserting `outputSchema` is forwarded
into outbound `/responses` requests as `text.format`:
  - `codex-rs/app-server/tests/suite/output_schema.rs`
  - `codex-rs/app-server/tests/suite/v2/output_schema.rs`
- Added per-turn semantics tests (schema does not leak to the next
turn):
  - `send_user_turn_output_schema_is_per_turn_v1`
  - `turn_start_output_schema_is_per_turn_v2`
- Added protocol wire-compat tests for the merged op:
  - serialize omits `final_output_json_schema` when `None`
  - deserialize works when field is missing
  - serialize includes `final_output_json_schema` when `Some(schema)`

Call site updates (high level)
- Updated all `Op::UserInput { .. }` constructions to include
`final_output_json_schema`:
  - `codex-rs/app-server/src/codex_message_processor.rs`
  - `codex-rs/core/src/codex_delegate.rs`
  - `codex-rs/mcp-server/src/codex_tool_runner.rs`
  - `codex-rs/tui/src/chatwidget.rs`
  - `codex-rs/tui2/src/chatwidget.rs`
  - plus impacted core tests.

Validation
- `just fmt`
- `cargo test -p codex-core`
- `cargo test -p codex-app-server`
- `cargo test -p codex-mcp-server`
- `cargo test -p codex-tui`
- `cargo test -p codex-tui2`
- `cargo test -p codex-protocol`
- `cargo clippy --all-features --tests --profile dev --fix -- -D
warnings`
											
										
										
											2026-01-05 10:27:00 -08:00
+								    #[test]
 								    fn user_input_serialization_omits_final_output_json_schema_when_none() -> Result<()> {
 								        let op = Op::UserInput {
 								            items: Vec::new(),
 								            final_output_json_schema: None,
 								        };
 								        let json_op = serde_json::to_value(op)?;
 								        assert_eq!(json_op, json!({ "type": "user_input", "items": [] }));
 								        Ok(())
 								    }
 								    #[test]
 								    fn user_input_deserializes_without_final_output_json_schema_field() -> Result<()> {
 								        let op: Op = serde_json::from_value(json!({ "type": "user_input", "items": [] }))?;
 								        assert_eq!(
 								            op,
 								            Op::UserInput {
 								                items: Vec::new(),
 								                final_output_json_schema: None,
 								            }
 								        );
 								        Ok(())
 								    }
 								    #[test]
 								    fn user_input_serialization_includes_final_output_json_schema_when_some() -> Result<()> {
 								        let schema = json!({
 								            "type": "object",
 								            "properties": {
 								                "answer": { "type": "string" }
 								            },
 								            "required": ["answer"],
 								            "additionalProperties": false
 								        });
 								        let op = Op::UserInput {
 								            items: Vec::new(),
 								            final_output_json_schema: Some(schema.clone()),
 								        };
 								        let json_op = serde_json::to_value(op)?;
 								        assert_eq!(
 								            json_op,
 								            json!({
 								                "type": "user_input",
 								                "items": [],
 								                "final_output_json_schema": schema,
 								            })
 								        );
 								        Ok(())
 								    }
-												Add text element metadata to types (#9235)

Initial type tweaking PR to make the diff of
https://github.com/openai/codex/pull/9116 smaller

This should not change any behavior, just adds some fields to types
											
										
										
											2026-01-14 16:41:50 -08:00
+								    #[test]
 								    fn user_input_text_serializes_empty_text_elements() -> Result<()> {
 								        let input = UserInput::Text {
 								            text: "hello".to_string(),
 								            text_elements: Vec::new(),
 								        };
 								        let json_input = serde_json::to_value(input)?;
 								        assert_eq!(
 								            json_input,
 								            json!({
 								                "type": "text",
 								                "text": "hello",
 								                "text_elements": [],
 								            })
 								        );
 								        Ok(())
 								    }
 								    #[test]
 								    fn user_message_event_serializes_empty_metadata_vectors() -> Result<()> {
 								        let event = UserMessageEvent {
 								            message: "hello".to_string(),
 								            images: None,
 								            local_images: Vec::new(),
 								            text_elements: Vec::new(),
 								        };
 								        let json_event = serde_json::to_value(event)?;
 								        assert_eq!(
 								            json_event,
 								            json!({
 								                "message": "hello",
 								                "local_images": [],
 								                "text_elements": [],
 								            })
 								        );
 								        Ok(())
 								    }
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    /// Serialize Event to verify that its JSON representation has the expected
 								    /// amount of nesting.
 								    #[test]
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								    fn serialize_event() -> Result<()> {
-												chore: unify conversation with thread name (#8830)

Done and verified by Codex + refactor feature of RustRover
											
										
										
											2026-01-07 17:04:53 +00:00
+								        let conversation_id = ThreadId::from_string("67e55044-10b1-426f-9247-bb680e5fe0c8")?;
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								        let rollout_file = NamedTempFile::new()?;
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								        let event = Event {
 								            id: "1234".to_string(),
 								            msg: EventMsg::SessionConfigured(SessionConfiguredEvent {
-												Use ConversationId instead of raw Uuids (#3282)

We're trying to migrate from `session_id: Uuid` to `conversation_id:
ConversationId`. Not only does this give us more type safety but it
unifies our terminology across Codex and with the implementation of
session resuming, a conversation (which can span multiple sessions) is
more appropriate.

I started this impl on https://github.com/openai/codex/pull/3219 as part
of getting resume working in the extension but it's big enough that it
should be broken out.
											
										
										
											2025-09-07 20:22:25 -07:00
+								                session_id: conversation_id,
-												feat: show forked from session id in /status (#9330)

Summary:
- Add forked_from to SessionMeta/SessionConfiguredEvent and persist it
for forked sessions.
- Surface forked_from in /status for tui + tui2 and add snapshots.
											
										
										
											2026-01-16 13:41:46 -08:00
+								                forked_from_id: None,
-												fix(codex-rs): use codex-mini-latest as default (#1164)


											
										
										
											2025-05-29 16:55:19 -07:00
+								                model: "codex-mini-latest".to_string(),
-												fix: add more fields to ThreadStartResponse and ThreadResumeResponse (#6847)

This adds the following fields to `ThreadStartResponse` and
`ThreadResumeResponse`:

```rust
    pub model: String,
    pub model_provider: String,
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox: SandboxPolicy,
    pub reasoning_effort: Option<ReasoningEffort>,
```

This is important because these fields are optional in
`ThreadStartParams` and `ThreadResumeParams`, so the caller needs to be
able to determine what values were ultimately used to start/resume the
conversation. (Though note that any of these could be changed later
between turns in the conversation.)

Though to get this information reliably, it must be read from the
internal `SessionConfiguredEvent` that is created in response to the
start of a conversation. Because `SessionConfiguredEvent` (as defined in
`codex-rs/protocol/src/protocol.rs`) did not have all of these fields, a
number of them had to be added as part of this PR.

Because `SessionConfiguredEvent` is referenced in many tests, test
instances of `SessionConfiguredEvent` had to be updated, as well, which
is why this PR touches so many files.
											
										
										
											2025-11-18 21:18:43 -08:00
+								                model_provider_id: "openai".to_string(),
 								                approval_policy: AskForApproval::Never,
 								                sandbox_policy: SandboxPolicy::ReadOnly,
 								                cwd: PathBuf::from("/home/user/project"),
-												feat: reasoning effort as optional (#3527)

Allow the reasoning effort to be optional
											
										
										
											2025-09-12 12:06:33 -07:00
+								                reasoning_effort: Some(ReasoningEffortConfig::default()),
-												feat: record messages from user in ~/.codex/history.jsonl (#939)

This is a large change to support a "history" feature like you would
expect in a shell like Bash.

History events are recorded in `$CODEX_HOME/history.jsonl`. Because it
is a JSONL file, it is straightforward to append new entries (as opposed
to the TypeScript file that uses `$CODEX_HOME/history.json`, so to be
valid JSON, each new entry entails rewriting the entire file). Because
it is possible for there to be multiple instances of Codex CLI writing
to `history.jsonl` at once, we use advisory file locking when working
with `history.jsonl` in `codex-rs/core/src/message_history.rs`.

Because we believe history is a sufficiently useful feature, we enable
it by default. Though to provide some safety, we set the file
permissions of `history.jsonl` to be `o600` so that other users on the
system cannot read the user's history. We do not yet support a default
list of `SENSITIVE_PATTERNS` as the TypeScript CLI does:


https://github.com/openai/codex/blob/3fdf9df1335ac9501e3fb0e61715359145711e8b/codex-cli/src/utils/storage/command-history.ts#L10-L17

We are going to take a more conservative approach to this list in the
Rust CLI. For example, while `/\b[A-Za-z0-9-_]{20,}\b/` might exclude
sensitive information like API tokens, it would also exclude valuable
information such as references to Git commits.

As noted in the updated documentation, users can opt-out of history by
adding the following to `config.toml`:

```toml
[history]
persistence = "none" 
```

Because `history.jsonl` could, in theory, be quite large, we take a[n
arguably overly pedantic] approach in reading history entries into
memory. Specifically, we start by telling the client the current number
of entries in the history file (`history_entry_count`) as well as the
inode (`history_log_id`) of `history.jsonl` (see the new fields on
`SessionConfiguredEvent`).

The client is responsible for keeping new entries in memory to create a
"local history," but if the user hits up enough times to go "past" the
end of local history, then the client should use the new
`GetHistoryEntryRequest` in the protocol to fetch older entries.
Specifically, it should pass the `history_log_id` it was given
originally and work backwards from `history_entry_count`. (It should
really fetch history in batches rather than one-at-a-time, but that is
something we can improve upon in subsequent PRs.)

The motivation behind this crazy scheme is that it is designed to defend
against:

* The `history.jsonl` being truncated during the session such that the
index into the history is no longer consistent with what had been read
up to that point. We do not yet have logic to enforce a `max_bytes` for
`history.jsonl`, but once we do, we will aspire to implement it in a way
that should result in a new inode for the file on most systems.
* New items from concurrent Codex CLI sessions amending to the history.
Because, in absence of truncation, `history.jsonl` is an append-only
log, so long as the client reads backwards from `history_entry_count`,
it should always get a consistent view of history. (That said, it will
not be able to read _new_ commands from concurrent sessions, but perhaps
we will introduce a `/` command to reload latest history or something
down the road.)

Admittedly, my testing of this feature thus far has been fairly light. I
expect we will find bugs and introduce enhancements/fixes going forward.
											
										
										
											2025-05-15 16:26:23 -07:00
+								                history_log_id: 0,
 								                history_entry_count: 0,
-												Replay EventMsgs from Response Items when resuming a session with history. (#3123)

### Overview

This PR introduces the following changes:
	1.	Adds a unified mechanism to convert ResponseItem into EventMsg.
2. Ensures that when a session is initialized with initial history, a
vector of EventMsg is sent along with the session configuration. This
allows clients to re-render the UI accordingly.
	3. 	Added integration testing

### Caveats

This implementation does not send every EventMsg that was previously
dispatched to clients. The excluded events fall into two categories:
	•	“Arguably” rolled-out events
Examples include tool calls and apply-patch calls. While these events
are conceptually rolled out, we currently only roll out ResponseItems.
These events are already being handled elsewhere and transformed into
EventMsg before being sent.
	•	Non-rolled-out events
Certain events such as TurnDiff, Error, and TokenCount are not rolled
out at all.

### Future Directions

At present, resuming a session involves maintaining two states:
	•	UI State
Clients can replay most of the important UI from the provided EventMsg
history.
	•	Model State
The model receives the complete session history to reconstruct its
internal state.

This design provides a solid foundation. If, in the future, more precise
UI reconstruction is needed, we have two potential paths:
1. Introduce a third data structure that allows us to derive both
ResponseItems and EventMsgs.
2. Clearly divide responsibilities: the core system ensures the
integrity of the model state, while clients are responsible for
reconstructing the UI.
											
										
										
											2025-09-03 21:47:00 -07:00
+								                initial_messages: None,
-												feat: ephemeral threads (#9765)

Add ephemeral threads capabilities. Only exposed through the
`app-server` v2

The idea is to disable the rollout recorder for those threads.
											
										
										
											2026-01-24 15:57:40 +01:00
+								                rollout_path: Some(rollout_file.path().to_path_buf()),
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								            }),
 								        };
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
 								        let expected = json!({
 								            "id": "1234",
 								            "msg": {
 								                "type": "session_configured",
 								                "session_id": "67e55044-10b1-426f-9247-bb680e5fe0c8",
 								                "model": "codex-mini-latest",
-												fix: add more fields to ThreadStartResponse and ThreadResumeResponse (#6847)

This adds the following fields to `ThreadStartResponse` and
`ThreadResumeResponse`:

```rust
    pub model: String,
    pub model_provider: String,
    pub cwd: PathBuf,
    pub approval_policy: AskForApproval,
    pub sandbox: SandboxPolicy,
    pub reasoning_effort: Option<ReasoningEffort>,
```

This is important because these fields are optional in
`ThreadStartParams` and `ThreadResumeParams`, so the caller needs to be
able to determine what values were ultimately used to start/resume the
conversation. (Though note that any of these could be changed later
between turns in the conversation.)

Though to get this information reliably, it must be read from the
internal `SessionConfiguredEvent` that is created in response to the
start of a conversation. Because `SessionConfiguredEvent` (as defined in
`codex-rs/protocol/src/protocol.rs`) did not have all of these fields, a
number of them had to be added as part of this PR.

Because `SessionConfiguredEvent` is referenced in many tests, test
instances of `SessionConfiguredEvent` had to be updated, as well, which
is why this PR touches so many files.
											
										
										
											2025-11-18 21:18:43 -08:00
+								                "model_provider_id": "openai",
 								                "approval_policy": "never",
 								                "sandbox_policy": {
 								                    "type": "read-only"
 								                },
 								                "cwd": "/home/user/project",
-												feat: include reasoning_effort in NewConversationResponse (#3506)

`ClientRequest::NewConversation` picks up the reasoning level from the user's defaults in `config.toml`, so it should be reported in `NewConversationResponse`.
											
										
										
											2025-09-11 21:04:40 -07:00
+								                "reasoning_effort": "medium",
-												fix: include rollout_path in NewConversationResponse (#3352)

Adding the `rollout_path` to the `NewConversationResponse` makes it so a
client can perform subsequent operations on a `(ConversationId,
PathBuf)` pair. #3353 will introduce support for `ArchiveConversation`.

---
[//]: # (BEGIN SAPLING FOOTER)
Stack created with [Sapling](https://sapling-scm.com). Best reviewed
with [ReviewStack](https://reviewstack.dev/openai/codex/pull/3352).
* #3353
* __->__ #3352
											
										
										
											2025-09-09 00:11:48 -07:00
+								                "history_log_id": 0,
 								                "history_entry_count": 0,
 								                "rollout_path": format!("{}", rollout_file.path().display()),
 								            }
 								        });
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								        assert_eq!(expected, serde_json::to_value(&event)?);
 								        Ok(())
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								    }
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
 								    #[test]
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								    fn vec_u8_as_base64_serialization_and_deserialization() -> Result<()> {
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
+								        let event = ExecCommandOutputDeltaEvent {
 								            call_id: "call21".to_string(),
 								            stream: ExecOutputStream::Stdout,
 								            chunk: vec![1, 2, 3, 4, 5],
 								        };
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								        let serialized = serde_json::to_string(&event)?;
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
+								        assert_eq!(
 								            r#"{"call_id":"call21","stream":"stdout","chunk":"AQIDBAU="}"#,
 								            serialized,
 								        );
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								        let deserialized: ExecCommandOutputDeltaEvent = serde_json::from_str(&serialized)?;
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
+								        assert_eq!(deserialized, event);
-												Use anyhow::Result in tests for error propagation (#4105)


											
										
										
											2025-09-23 13:31:36 -07:00
+								        Ok(())
-												fix: fix serde_as annotation and verify with test (#3170)

I didn't do https://github.com/openai/codex/pull/3163 correctly the
first time: now verified with a test.
											
										
										
											2025-09-04 10:38:00 -07:00
+								    }
-												core/tui: non-blocking MCP startup (#6334)

This makes MCP startup not block TUI startup. Messages sent while MCPs
are booting will be queued.


https://github.com/user-attachments/assets/96e1d234-5d8f-4932-a935-a675d35c05e0


Fixes #6317

---------

Co-authored-by: pakrym-oai <pakrym@openai.com>
											
										
										
											2025-11-17 11:26:11 -08:00
 								    #[test]
 								    fn serialize_mcp_startup_update_event() -> Result<()> {
 								        let event = Event {
 								            id: "init".to_string(),
 								            msg: EventMsg::McpStartupUpdate(McpStartupUpdateEvent {
 								                server: "srv".to_string(),
 								                status: McpStartupStatus::Failed {
 								                    error: "boom".to_string(),
 								                },
 								            }),
 								        };
 								        let value = serde_json::to_value(&event)?;
 								        assert_eq!(value["msg"]["type"], "mcp_startup_update");
 								        assert_eq!(value["msg"]["server"], "srv");
 								        assert_eq!(value["msg"]["status"]["state"], "failed");
 								        assert_eq!(value["msg"]["status"]["error"], "boom");
 								        Ok(())
 								    }
 								    #[test]
 								    fn serialize_mcp_startup_complete_event() -> Result<()> {
 								        let event = Event {
 								            id: "init".to_string(),
 								            msg: EventMsg::McpStartupComplete(McpStartupCompleteEvent {
 								                ready: vec!["a".to_string()],
 								                failed: vec![McpStartupFailure {
 								                    server: "b".to_string(),
 								                    error: "bad".to_string(),
 								                }],
 								                cancelled: vec!["c".to_string()],
 								            }),
 								        };
 								        let value = serde_json::to_value(&event)?;
 								        assert_eq!(value["msg"]["type"], "mcp_startup_complete");
 								        assert_eq!(value["msg"]["ready"][0], "a");
 								        assert_eq!(value["msg"]["failed"][0]["server"], "b");
 								        assert_eq!(value["msg"]["failed"][0]["error"], "bad");
 								        assert_eq!(value["msg"]["cancelled"][0], "c");
 								        Ok(())
 								    }
-												fix: change EventMsg enum so every variant takes a single struct (#925)

https://github.com/openai/codex/pull/922 did this for the
`SessionConfigured` enum variant, and I think it is generally helpful to
be able to work with the values as each enum variant as their own type,
so this converts the remaining variants and updates all of the
callsites.

Added a simple unit test to verify that the JSON-serialized version of
`Event` does not have any unexpected nesting.
											
										
										
											2025-05-13 20:44:42 -07:00
+								}