core-agent-ide/codex-rs/app-server-test-client/src/lib.rs
Michael Bolin b77fe8fefe
Apply argument comment lint across codex-rs (#14652)
## Why

Once the repo-local lint exists, `codex-rs` needs to follow the
checked-in convention and CI needs to keep it from drifting. This commit
applies the fallback `/*param*/` style consistently across existing
positional literal call sites without changing those APIs.

The longer-term preference is still to avoid APIs that require comments
by choosing clearer parameter types and call shapes. This PR is
intentionally the mechanical follow-through for the places where the
existing signatures stay in place.

After rebasing onto newer `main`, the rollout also had to cover newly
introduced `tui_app_server` call sites. That made it clear the first cut
of the CI job was too expensive for the common path: it was spending
almost as much time installing `cargo-dylint` and re-testing the lint
crate as a representative test job spends running product tests. The CI
update keeps the full workspace enforcement but trims that extra
overhead from ordinary `codex-rs` PRs.

## What changed

- keep a dedicated `argument_comment_lint` job in `rust-ci`
- mechanically annotate remaining opaque positional literals across
`codex-rs` with exact `/*param*/` comments, including the rebased
`tui_app_server` call sites that now fall under the lint
- keep the checked-in style aligned with the lint policy by using
`/*param*/` and leaving string and char literals uncommented
- cache `cargo-dylint`, `dylint-link`, and the relevant Cargo
registry/git metadata in the lint job
- split changed-path detection so the lint crate's own `cargo test` step
runs only when `tools/argument-comment-lint/*` or `rust-ci.yml` changes
- continue to run the repo wrapper over the `codex-rs` workspace, so
product-code enforcement is unchanged

Most of the code changes in this commit are intentionally mechanical
comment rewrites or insertions driven by the lint itself.

## Verification

- `./tools/argument-comment-lint/run.sh --workspace`
- `cargo test -p codex-tui-app-server -p codex-tui`
- parsed `.github/workflows/rust-ci.yml` locally with PyYAML

---

* -> #14652
* #14651
2026-03-16 16:48:15 -07:00

2197 lines
78 KiB
Rust

use std::collections::VecDeque;
use std::ffi::OsString;
use std::fs;
use std::fs::OpenOptions;
use std::io::BufRead;
use std::io::BufReader;
use std::io::Write;
use std::net::TcpListener;
use std::net::TcpStream;
use std::path::Path;
use std::path::PathBuf;
use std::process::Child;
use std::process::ChildStdin;
use std::process::ChildStdout;
use std::process::Command;
use std::process::Stdio;
use std::thread;
use std::time::Duration;
use std::time::Instant;
use std::time::SystemTime;
use anyhow::Context;
use anyhow::Result;
use anyhow::bail;
use clap::ArgAction;
use clap::Parser;
use clap::Subcommand;
use codex_app_server_protocol::AccountLoginCompletedNotification;
use codex_app_server_protocol::AskForApproval;
use codex_app_server_protocol::ClientInfo;
use codex_app_server_protocol::ClientRequest;
use codex_app_server_protocol::CommandExecutionApprovalDecision;
use codex_app_server_protocol::CommandExecutionRequestApprovalParams;
use codex_app_server_protocol::CommandExecutionRequestApprovalResponse;
use codex_app_server_protocol::CommandExecutionStatus;
use codex_app_server_protocol::DynamicToolSpec;
use codex_app_server_protocol::FileChangeApprovalDecision;
use codex_app_server_protocol::FileChangeRequestApprovalParams;
use codex_app_server_protocol::FileChangeRequestApprovalResponse;
use codex_app_server_protocol::GetAccountRateLimitsResponse;
use codex_app_server_protocol::InitializeCapabilities;
use codex_app_server_protocol::InitializeParams;
use codex_app_server_protocol::InitializeResponse;
use codex_app_server_protocol::JSONRPCMessage;
use codex_app_server_protocol::JSONRPCNotification;
use codex_app_server_protocol::JSONRPCRequest;
use codex_app_server_protocol::JSONRPCResponse;
use codex_app_server_protocol::LoginAccountResponse;
use codex_app_server_protocol::ModelListParams;
use codex_app_server_protocol::ModelListResponse;
use codex_app_server_protocol::ReadOnlyAccess;
use codex_app_server_protocol::RequestId;
use codex_app_server_protocol::SandboxPolicy;
use codex_app_server_protocol::ServerNotification;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ThreadDecrementElicitationParams;
use codex_app_server_protocol::ThreadDecrementElicitationResponse;
use codex_app_server_protocol::ThreadIncrementElicitationParams;
use codex_app_server_protocol::ThreadIncrementElicitationResponse;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadListParams;
use codex_app_server_protocol::ThreadListResponse;
use codex_app_server_protocol::ThreadResumeParams;
use codex_app_server_protocol::ThreadResumeResponse;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
use codex_app_server_protocol::TurnStartParams;
use codex_app_server_protocol::TurnStartResponse;
use codex_app_server_protocol::TurnStatus;
use codex_app_server_protocol::UserInput as V2UserInput;
use codex_core::config::Config;
use codex_otel::OtelProvider;
use codex_otel::current_span_w3c_trace_context;
use codex_protocol::openai_models::ReasoningEffort;
use codex_protocol::protocol::W3cTraceContext;
use codex_utils_cli::CliConfigOverrides;
use serde::Serialize;
use serde::de::DeserializeOwned;
use serde_json::Value;
use tracing::info_span;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tungstenite::Message;
use tungstenite::WebSocket;
use tungstenite::connect;
use tungstenite::stream::MaybeTlsStream;
use url::Url;
use uuid::Uuid;
const NOTIFICATIONS_TO_OPT_OUT: &[&str] = &[
// v2 item deltas.
"command/exec/outputDelta",
"item/agentMessage/delta",
"item/plan/delta",
"item/fileChange/outputDelta",
"item/reasoning/summaryTextDelta",
"item/reasoning/textDelta",
];
const APP_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = Duration::from_secs(5);
const APP_SERVER_GRACEFUL_SHUTDOWN_POLL_INTERVAL: Duration = Duration::from_millis(100);
const DEFAULT_ANALYTICS_ENABLED: bool = true;
const OTEL_SERVICE_NAME: &str = "codex-app-server-test-client";
const TRACE_DISABLED_MESSAGE: &str =
"Not enabled - enable tracing in $CODEX_HOME/config.toml to get a trace URL!";
/// Minimal launcher that initializes the Codex app-server and logs the handshake.
#[derive(Parser)]
#[command(author = "Codex", version, about = "Bootstrap Codex app-server", long_about = None)]
struct Cli {
/// Path to the `codex` CLI binary. When set, requests use stdio by
/// spawning `codex app-server` as a child process.
#[arg(long, env = "CODEX_BIN", global = true)]
codex_bin: Option<PathBuf>,
/// Existing websocket server URL to connect to.
///
/// If neither `--codex-bin` nor `--url` is provided, defaults to
/// `ws://127.0.0.1:4222`.
#[arg(long, env = "CODEX_APP_SERVER_URL", global = true)]
url: Option<String>,
/// Forwarded to the `codex` CLI as `--config key=value`. Repeatable.
///
/// Example:
/// `--config 'model_providers.mock.base_url="http://localhost:4010/v2"'`
#[arg(
short = 'c',
long = "config",
value_name = "key=value",
action = ArgAction::Append,
global = true
)]
config_overrides: Vec<String>,
/// JSON array of dynamic tool specs or a single tool object.
/// Prefix a filename with '@' to read from a file.
///
/// Example:
/// --dynamic-tools '[{"name":"demo","description":"Demo","inputSchema":{"type":"object"}}]'
/// --dynamic-tools @/path/to/tools.json
#[arg(long, value_name = "json-or-@file", global = true)]
dynamic_tools: Option<String>,
#[command(subcommand)]
command: CliCommand,
}
#[derive(Subcommand)]
enum CliCommand {
/// Start `codex app-server` on a websocket endpoint in the background.
///
/// Logs are written to:
/// `/tmp/codex-app-server-test-client/`
Serve {
/// WebSocket listen URL passed to `codex app-server --listen`.
#[arg(long, default_value = "ws://127.0.0.1:4222")]
listen: String,
/// Kill any process listening on the same port before starting.
#[arg(long, default_value_t = false)]
kill: bool,
},
/// Send a user message through the Codex app-server.
SendMessage {
/// User message to send to Codex.
user_message: String,
},
/// Send a user message through the app-server V2 thread/turn APIs.
SendMessageV2 {
/// Opt into experimental app-server methods and fields.
#[arg(long)]
experimental_api: bool,
/// User message to send to Codex.
user_message: String,
},
/// Resume a V2 thread by id, then send a user message.
ResumeMessageV2 {
/// Existing thread id to resume.
thread_id: String,
/// User message to send to Codex.
user_message: String,
},
/// Resume a V2 thread and continuously stream notifications/events.
///
/// This command does not auto-exit; stop it with SIGINT/SIGTERM/SIGKILL.
ThreadResume {
/// Existing thread id to resume.
thread_id: String,
},
/// Initialize the app-server and dump all inbound messages until interrupted.
///
/// This command does not auto-exit; stop it with SIGINT/SIGTERM/SIGKILL.
Watch,
/// Start a V2 turn that elicits an ExecCommand approval.
#[command(name = "trigger-cmd-approval")]
TriggerCmdApproval {
/// Optional prompt; defaults to a simple python command.
user_message: Option<String>,
},
/// Start a V2 turn that elicits an ApplyPatch approval.
#[command(name = "trigger-patch-approval")]
TriggerPatchApproval {
/// Optional prompt; defaults to creating a file via apply_patch.
user_message: Option<String>,
},
/// Start a V2 turn that should not elicit an ExecCommand approval.
#[command(name = "no-trigger-cmd-approval")]
NoTriggerCmdApproval,
/// Send two sequential V2 turns in the same thread to test follow-up behavior.
SendFollowUpV2 {
/// Initial user message for the first turn.
first_message: String,
/// Follow-up user message for the second turn.
follow_up_message: String,
},
/// Trigger zsh-fork multi-subcommand approvals and assert expected approval behavior.
#[command(name = "trigger-zsh-fork-multi-cmd-approval")]
TriggerZshForkMultiCmdApproval {
/// Optional prompt; defaults to an explicit `/usr/bin/true && /usr/bin/true` command.
user_message: Option<String>,
/// Minimum number of command-approval callbacks expected in the turn.
#[arg(long, default_value_t = 2)]
min_approvals: usize,
/// One-based approval index to abort (e.g. --abort-on 2 aborts the second approval).
#[arg(long)]
abort_on: Option<usize>,
},
/// Trigger the ChatGPT login flow and wait for completion.
TestLogin,
/// Fetch the current account rate limits from the Codex app-server.
GetAccountRateLimits,
/// List the available models from the Codex app-server.
#[command(name = "model-list")]
ModelList,
/// List stored threads from the Codex app-server.
#[command(name = "thread-list")]
ThreadList {
/// Number of threads to return.
#[arg(long, default_value_t = 20)]
limit: u32,
},
/// Increment the out-of-band elicitation pause counter for a thread.
#[command(name = "thread-increment-elicitation")]
ThreadIncrementElicitation {
/// Existing thread id to update.
thread_id: String,
},
/// Decrement the out-of-band elicitation pause counter for a thread.
#[command(name = "thread-decrement-elicitation")]
ThreadDecrementElicitation {
/// Existing thread id to update.
thread_id: String,
},
/// Run the live websocket harness that proves elicitation pause prevents a
/// 10s unified exec timeout from killing a 15s helper script.
#[command(name = "live-elicitation-timeout-pause")]
LiveElicitationTimeoutPause {
/// Model passed to `thread/start`.
#[arg(long, env = "CODEX_E2E_MODEL", default_value = "gpt-5")]
model: String,
/// Existing workspace path used as the turn cwd.
#[arg(long, value_name = "path", default_value = ".")]
workspace: PathBuf,
/// Helper script to run from the model; defaults to the repo-local
/// live elicitation hold script.
#[arg(long, value_name = "path")]
script: Option<PathBuf>,
/// Seconds the helper script should sleep while the timeout is paused.
#[arg(long, default_value_t = 15)]
hold_seconds: u64,
},
}
pub async fn run() -> Result<()> {
let Cli {
codex_bin,
url,
config_overrides,
dynamic_tools,
command,
} = Cli::parse();
let dynamic_tools = parse_dynamic_tools_arg(&dynamic_tools)?;
match command {
CliCommand::Serve { listen, kill } => {
ensure_dynamic_tools_unused(&dynamic_tools, "serve")?;
let codex_bin = codex_bin.unwrap_or_else(|| PathBuf::from("codex"));
serve(&codex_bin, &config_overrides, &listen, kill)
}
CliCommand::SendMessage { user_message } => {
ensure_dynamic_tools_unused(&dynamic_tools, "send-message")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
send_message(&endpoint, &config_overrides, user_message).await
}
CliCommand::SendMessageV2 {
experimental_api,
user_message,
} => {
let endpoint = resolve_endpoint(codex_bin, url)?;
send_message_v2_endpoint(
&endpoint,
&config_overrides,
user_message,
experimental_api,
&dynamic_tools,
)
.await
}
CliCommand::ResumeMessageV2 {
thread_id,
user_message,
} => {
let endpoint = resolve_endpoint(codex_bin, url)?;
resume_message_v2(
&endpoint,
&config_overrides,
thread_id,
user_message,
&dynamic_tools,
)
.await
}
CliCommand::ThreadResume { thread_id } => {
ensure_dynamic_tools_unused(&dynamic_tools, "thread-resume")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
thread_resume_follow(&endpoint, &config_overrides, thread_id).await
}
CliCommand::Watch => {
ensure_dynamic_tools_unused(&dynamic_tools, "watch")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
watch(&endpoint, &config_overrides).await
}
CliCommand::TriggerCmdApproval { user_message } => {
let endpoint = resolve_endpoint(codex_bin, url)?;
trigger_cmd_approval(&endpoint, &config_overrides, user_message, &dynamic_tools).await
}
CliCommand::TriggerPatchApproval { user_message } => {
let endpoint = resolve_endpoint(codex_bin, url)?;
trigger_patch_approval(&endpoint, &config_overrides, user_message, &dynamic_tools).await
}
CliCommand::NoTriggerCmdApproval => {
let endpoint = resolve_endpoint(codex_bin, url)?;
no_trigger_cmd_approval(&endpoint, &config_overrides, &dynamic_tools).await
}
CliCommand::SendFollowUpV2 {
first_message,
follow_up_message,
} => {
let endpoint = resolve_endpoint(codex_bin, url)?;
send_follow_up_v2(
&endpoint,
&config_overrides,
first_message,
follow_up_message,
&dynamic_tools,
)
.await
}
CliCommand::TriggerZshForkMultiCmdApproval {
user_message,
min_approvals,
abort_on,
} => {
let endpoint = resolve_endpoint(codex_bin, url)?;
trigger_zsh_fork_multi_cmd_approval(
&endpoint,
&config_overrides,
user_message,
min_approvals,
abort_on,
&dynamic_tools,
)
.await
}
CliCommand::TestLogin => {
ensure_dynamic_tools_unused(&dynamic_tools, "test-login")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
test_login(&endpoint, &config_overrides).await
}
CliCommand::GetAccountRateLimits => {
ensure_dynamic_tools_unused(&dynamic_tools, "get-account-rate-limits")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
get_account_rate_limits(&endpoint, &config_overrides).await
}
CliCommand::ModelList => {
ensure_dynamic_tools_unused(&dynamic_tools, "model-list")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
model_list(&endpoint, &config_overrides).await
}
CliCommand::ThreadList { limit } => {
ensure_dynamic_tools_unused(&dynamic_tools, "thread-list")?;
let endpoint = resolve_endpoint(codex_bin, url)?;
thread_list(&endpoint, &config_overrides, limit).await
}
CliCommand::ThreadIncrementElicitation { thread_id } => {
ensure_dynamic_tools_unused(&dynamic_tools, "thread-increment-elicitation")?;
let url = resolve_shared_websocket_url(codex_bin, url, "thread-increment-elicitation")?;
thread_increment_elicitation(&url, thread_id)
}
CliCommand::ThreadDecrementElicitation { thread_id } => {
ensure_dynamic_tools_unused(&dynamic_tools, "thread-decrement-elicitation")?;
let url = resolve_shared_websocket_url(codex_bin, url, "thread-decrement-elicitation")?;
thread_decrement_elicitation(&url, thread_id)
}
CliCommand::LiveElicitationTimeoutPause {
model,
workspace,
script,
hold_seconds,
} => {
ensure_dynamic_tools_unused(&dynamic_tools, "live-elicitation-timeout-pause")?;
live_elicitation_timeout_pause(
codex_bin,
url,
&config_overrides,
model,
workspace,
script,
hold_seconds,
)
}
}
}
enum Endpoint {
SpawnCodex(PathBuf),
ConnectWs(String),
}
struct BackgroundAppServer {
process: Child,
url: String,
}
fn resolve_endpoint(codex_bin: Option<PathBuf>, url: Option<String>) -> Result<Endpoint> {
if codex_bin.is_some() && url.is_some() {
bail!("--codex-bin and --url are mutually exclusive");
}
if let Some(codex_bin) = codex_bin {
return Ok(Endpoint::SpawnCodex(codex_bin));
}
if let Some(url) = url {
return Ok(Endpoint::ConnectWs(url));
}
Ok(Endpoint::ConnectWs("ws://127.0.0.1:4222".to_string()))
}
fn resolve_shared_websocket_url(
codex_bin: Option<PathBuf>,
url: Option<String>,
command: &str,
) -> Result<String> {
if codex_bin.is_some() {
bail!(
"{command} requires --url or an already-running websocket app-server; --codex-bin would spawn a private stdio app-server instead"
);
}
Ok(url.unwrap_or_else(|| "ws://127.0.0.1:4222".to_string()))
}
impl BackgroundAppServer {
fn spawn(codex_bin: &Path, config_overrides: &[String]) -> Result<Self> {
let listener = TcpListener::bind("127.0.0.1:0")
.context("failed to reserve a local port for websocket app-server")?;
let addr = listener.local_addr()?;
drop(listener);
let url = format!("ws://{addr}");
let mut cmd = Command::new(codex_bin);
if let Some(codex_bin_parent) = codex_bin.parent() {
let mut path = OsString::from(codex_bin_parent.as_os_str());
if let Some(existing_path) = std::env::var_os("PATH") {
path.push(":");
path.push(existing_path);
}
cmd.env("PATH", path);
}
for override_kv in config_overrides {
cmd.arg("--config").arg(override_kv);
}
let process = cmd
.arg("app-server")
.arg("--listen")
.arg(&url)
.stdin(Stdio::null())
.stdout(Stdio::null())
.stderr(Stdio::inherit())
.spawn()
.with_context(|| format!("failed to start `{}` app-server", codex_bin.display()))?;
Ok(Self { process, url })
}
}
impl Drop for BackgroundAppServer {
fn drop(&mut self) {
if let Ok(Some(status)) = self.process.try_wait() {
println!("[background app-server exited: {status}]");
return;
}
let _ = self.process.kill();
let _ = self.process.wait();
}
}
fn serve(codex_bin: &Path, config_overrides: &[String], listen: &str, kill: bool) -> Result<()> {
let runtime_dir = PathBuf::from("/tmp/codex-app-server-test-client");
fs::create_dir_all(&runtime_dir)
.with_context(|| format!("failed to create runtime dir {}", runtime_dir.display()))?;
let log_path = runtime_dir.join("app-server.log");
if kill {
kill_listeners_on_same_port(listen)?;
}
let log_file = OpenOptions::new()
.create(true)
.append(true)
.open(&log_path)
.with_context(|| format!("failed to open log file {}", log_path.display()))?;
let log_file_stderr = log_file
.try_clone()
.with_context(|| format!("failed to clone log file handle {}", log_path.display()))?;
let mut cmdline = format!(
"tail -f /dev/null | RUST_BACKTRACE=full RUST_LOG=warn,codex_=trace {}",
shell_quote(&codex_bin.display().to_string())
);
for override_kv in config_overrides {
cmdline.push_str(&format!(" --config {}", shell_quote(override_kv)));
}
cmdline.push_str(&format!(" app-server --listen {}", shell_quote(listen)));
let child = Command::new("nohup")
.arg("sh")
.arg("-c")
.arg(cmdline)
.stdin(Stdio::null())
.stdout(Stdio::from(log_file))
.stderr(Stdio::from(log_file_stderr))
.spawn()
.with_context(|| format!("failed to start `{}` app-server", codex_bin.display()))?;
let pid = child.id();
println!("started codex app-server");
println!("listen: {listen}");
println!("pid: {pid} (launcher process)");
println!("log: {}", log_path.display());
Ok(())
}
fn kill_listeners_on_same_port(listen: &str) -> Result<()> {
let url = Url::parse(listen).with_context(|| format!("invalid --listen URL `{listen}`"))?;
let port = url
.port_or_known_default()
.with_context(|| format!("unable to infer port from --listen URL `{listen}`"))?;
let output = Command::new("lsof")
.arg("-nP")
.arg(format!("-tiTCP:{port}"))
.arg("-sTCP:LISTEN")
.output()
.with_context(|| format!("failed to run lsof for port {port}"))?;
if !output.status.success() {
return Ok(());
}
let pids: Vec<u32> = String::from_utf8_lossy(&output.stdout)
.lines()
.filter_map(|line| line.trim().parse::<u32>().ok())
.collect();
if pids.is_empty() {
return Ok(());
}
for pid in pids {
println!("killing listener pid {pid} on port {port}");
let pid_str = pid.to_string();
let term_status = Command::new("kill")
.arg(&pid_str)
.status()
.with_context(|| format!("failed to send SIGTERM to pid {pid}"))?;
if !term_status.success() {
continue;
}
}
thread::sleep(Duration::from_millis(300));
let output = Command::new("lsof")
.arg("-nP")
.arg(format!("-tiTCP:{port}"))
.arg("-sTCP:LISTEN")
.output()
.with_context(|| format!("failed to re-check listeners on port {port}"))?;
if !output.status.success() {
return Ok(());
}
let remaining: Vec<u32> = String::from_utf8_lossy(&output.stdout)
.lines()
.filter_map(|line| line.trim().parse::<u32>().ok())
.collect();
for pid in remaining {
println!("force killing remaining listener pid {pid} on port {port}");
let _ = Command::new("kill").arg("-9").arg(pid.to_string()).status();
}
Ok(())
}
fn shell_quote(input: &str) -> String {
format!("'{}'", input.replace('\'', "'\\''"))
}
struct SendMessagePolicies<'a> {
command_name: &'static str,
experimental_api: bool,
approval_policy: Option<AskForApproval>,
sandbox_policy: Option<SandboxPolicy>,
dynamic_tools: &'a Option<Vec<DynamicToolSpec>>,
}
async fn send_message(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: String,
) -> Result<()> {
let dynamic_tools = None;
send_message_v2_with_policies(
endpoint,
config_overrides,
user_message,
SendMessagePolicies {
command_name: "send-message",
experimental_api: false,
approval_policy: None,
sandbox_policy: None,
dynamic_tools: &dynamic_tools,
},
)
.await
}
pub async fn send_message_v2(
codex_bin: &Path,
config_overrides: &[String],
user_message: String,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
let endpoint = Endpoint::SpawnCodex(codex_bin.to_path_buf());
send_message_v2_endpoint(
&endpoint,
config_overrides,
user_message,
/*experimental_api*/ true,
dynamic_tools,
)
.await
}
async fn send_message_v2_endpoint(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: String,
experimental_api: bool,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
if dynamic_tools.is_some() && !experimental_api {
bail!("--dynamic-tools requires --experimental-api for send-message-v2");
}
send_message_v2_with_policies(
endpoint,
config_overrides,
user_message,
SendMessagePolicies {
command_name: "send-message-v2",
experimental_api,
approval_policy: None,
sandbox_policy: None,
dynamic_tools,
},
)
.await
}
async fn trigger_zsh_fork_multi_cmd_approval(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: Option<String>,
min_approvals: usize,
abort_on: Option<usize>,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
if let Some(abort_on) = abort_on
&& abort_on == 0
{
bail!("--abort-on must be >= 1 when provided");
}
let default_prompt = "Run this exact command using shell command execution without rewriting or splitting it: /usr/bin/true && /usr/bin/true";
let message = user_message.unwrap_or_else(|| default_prompt.to_string());
with_client(
"trigger-zsh-fork-multi-cmd-approval",
endpoint,
config_overrides,
|client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let thread_response = client.thread_start(ThreadStartParams {
dynamic_tools: dynamic_tools.clone(),
..Default::default()
})?;
println!("< thread/start response: {thread_response:?}");
client.command_approval_behavior = match abort_on {
Some(index) => CommandApprovalBehavior::AbortOn(index),
None => CommandApprovalBehavior::AlwaysAccept,
};
client.command_approval_count = 0;
client.command_approval_item_ids.clear();
client.command_execution_statuses.clear();
client.last_turn_status = None;
let mut turn_params = TurnStartParams {
thread_id: thread_response.thread.id.clone(),
input: vec![V2UserInput::Text {
text: message,
text_elements: Vec::new(),
}],
..Default::default()
};
turn_params.approval_policy = Some(AskForApproval::OnRequest);
turn_params.sandbox_policy = Some(SandboxPolicy::ReadOnly {
access: ReadOnlyAccess::FullAccess,
network_access: false,
});
let turn_response = client.turn_start(turn_params)?;
println!("< turn/start response: {turn_response:?}");
client.stream_turn(&thread_response.thread.id, &turn_response.turn.id)?;
if client.command_approval_count < min_approvals {
bail!(
"expected at least {min_approvals} command approvals, got {}",
client.command_approval_count
);
}
let mut approvals_per_item = std::collections::BTreeMap::new();
for item_id in &client.command_approval_item_ids {
*approvals_per_item.entry(item_id.clone()).or_insert(0usize) += 1;
}
let max_approvals_for_one_item =
approvals_per_item.values().copied().max().unwrap_or(0);
if max_approvals_for_one_item < min_approvals {
bail!(
"expected at least {min_approvals} approvals for one command item, got max {max_approvals_for_one_item} with map {approvals_per_item:?}"
);
}
let last_command_status = client.command_execution_statuses.last();
if abort_on.is_none() {
if last_command_status != Some(&CommandExecutionStatus::Completed) {
bail!("expected completed command execution, got {last_command_status:?}");
}
if client.last_turn_status != Some(TurnStatus::Completed) {
bail!(
"expected completed turn in all-accept flow, got {:?}",
client.last_turn_status
);
}
} else if last_command_status == Some(&CommandExecutionStatus::Completed) {
bail!(
"expected non-completed command execution in mixed approval/decline flow, got {last_command_status:?}"
);
}
println!(
"[zsh-fork multi-approval summary] approvals={}, approvals_per_item={approvals_per_item:?}, command_statuses={:?}, turn_status={:?}",
client.command_approval_count,
client.command_execution_statuses,
client.last_turn_status
);
Ok(())
},
)
.await
}
async fn resume_message_v2(
endpoint: &Endpoint,
config_overrides: &[String],
thread_id: String,
user_message: String,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
ensure_dynamic_tools_unused(dynamic_tools, "resume-message-v2")?;
with_client("resume-message-v2", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let resume_response = client.thread_resume(ThreadResumeParams {
thread_id,
..Default::default()
})?;
println!("< thread/resume response: {resume_response:?}");
let turn_response = client.turn_start(TurnStartParams {
thread_id: resume_response.thread.id.clone(),
input: vec![V2UserInput::Text {
text: user_message,
text_elements: Vec::new(),
}],
..Default::default()
})?;
println!("< turn/start response: {turn_response:?}");
client.stream_turn(&resume_response.thread.id, &turn_response.turn.id)?;
Ok(())
})
.await
}
async fn thread_resume_follow(
endpoint: &Endpoint,
config_overrides: &[String],
thread_id: String,
) -> Result<()> {
with_client("thread-resume", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let resume_response = client.thread_resume(ThreadResumeParams {
thread_id,
..Default::default()
})?;
println!("< thread/resume response: {resume_response:?}");
println!("< streaming notifications until process is terminated");
client.stream_notifications_forever()
})
.await
}
async fn watch(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> {
with_client("watch", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
println!("< streaming inbound messages until process is terminated");
client.stream_notifications_forever()
})
.await
}
async fn trigger_cmd_approval(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: Option<String>,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
let default_prompt =
"Run `touch /tmp/should-trigger-approval` so I can confirm the file exists.";
let message = user_message.unwrap_or_else(|| default_prompt.to_string());
send_message_v2_with_policies(
endpoint,
config_overrides,
message,
SendMessagePolicies {
command_name: "trigger-cmd-approval",
experimental_api: true,
approval_policy: Some(AskForApproval::OnRequest),
sandbox_policy: Some(SandboxPolicy::ReadOnly {
access: ReadOnlyAccess::FullAccess,
network_access: false,
}),
dynamic_tools,
},
)
.await
}
async fn trigger_patch_approval(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: Option<String>,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
let default_prompt =
"Create a file named APPROVAL_DEMO.txt containing a short hello message using apply_patch.";
let message = user_message.unwrap_or_else(|| default_prompt.to_string());
send_message_v2_with_policies(
endpoint,
config_overrides,
message,
SendMessagePolicies {
command_name: "trigger-patch-approval",
experimental_api: true,
approval_policy: Some(AskForApproval::OnRequest),
sandbox_policy: Some(SandboxPolicy::ReadOnly {
access: ReadOnlyAccess::FullAccess,
network_access: false,
}),
dynamic_tools,
},
)
.await
}
async fn no_trigger_cmd_approval(
endpoint: &Endpoint,
config_overrides: &[String],
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
let prompt = "Run `touch should_not_trigger_approval.txt`";
send_message_v2_with_policies(
endpoint,
config_overrides,
prompt.to_string(),
SendMessagePolicies {
command_name: "no-trigger-cmd-approval",
experimental_api: true,
approval_policy: None,
sandbox_policy: None,
dynamic_tools,
},
)
.await
}
async fn send_message_v2_with_policies(
endpoint: &Endpoint,
config_overrides: &[String],
user_message: String,
policies: SendMessagePolicies<'_>,
) -> Result<()> {
with_client(
policies.command_name,
endpoint,
config_overrides,
|client| {
let initialize = client.initialize_with_experimental_api(policies.experimental_api)?;
println!("< initialize response: {initialize:?}");
let thread_response = client.thread_start(ThreadStartParams {
dynamic_tools: policies.dynamic_tools.clone(),
..Default::default()
})?;
println!("< thread/start response: {thread_response:?}");
let mut turn_params = TurnStartParams {
thread_id: thread_response.thread.id.clone(),
input: vec![V2UserInput::Text {
text: user_message,
// Test client sends plain text without UI element ranges.
text_elements: Vec::new(),
}],
..Default::default()
};
turn_params.approval_policy = policies.approval_policy;
turn_params.sandbox_policy = policies.sandbox_policy;
let turn_response = client.turn_start(turn_params)?;
println!("< turn/start response: {turn_response:?}");
client.stream_turn(&thread_response.thread.id, &turn_response.turn.id)?;
Ok(())
},
)
.await
}
async fn send_follow_up_v2(
endpoint: &Endpoint,
config_overrides: &[String],
first_message: String,
follow_up_message: String,
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
) -> Result<()> {
with_client("send-follow-up-v2", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let thread_response = client.thread_start(ThreadStartParams {
dynamic_tools: dynamic_tools.clone(),
..Default::default()
})?;
println!("< thread/start response: {thread_response:?}");
let first_turn_params = TurnStartParams {
thread_id: thread_response.thread.id.clone(),
input: vec![V2UserInput::Text {
text: first_message,
// Test client sends plain text without UI element ranges.
text_elements: Vec::new(),
}],
..Default::default()
};
let first_turn_response = client.turn_start(first_turn_params)?;
println!("< turn/start response (initial): {first_turn_response:?}");
client.stream_turn(&thread_response.thread.id, &first_turn_response.turn.id)?;
let follow_up_params = TurnStartParams {
thread_id: thread_response.thread.id.clone(),
input: vec![V2UserInput::Text {
text: follow_up_message,
// Test client sends plain text without UI element ranges.
text_elements: Vec::new(),
}],
..Default::default()
};
let follow_up_response = client.turn_start(follow_up_params)?;
println!("< turn/start response (follow-up): {follow_up_response:?}");
client.stream_turn(&thread_response.thread.id, &follow_up_response.turn.id)?;
Ok(())
})
.await
}
async fn test_login(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> {
with_client("test-login", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let login_response = client.login_account_chatgpt()?;
println!("< account/login/start response: {login_response:?}");
let LoginAccountResponse::Chatgpt { login_id, auth_url } = login_response else {
bail!("expected chatgpt login response");
};
println!("Open the following URL in your browser to continue:\n{auth_url}");
let completion = client.wait_for_account_login_completion(&login_id)?;
println!("< account/login/completed notification: {completion:?}");
if completion.success {
println!("Login succeeded.");
Ok(())
} else {
bail!(
"login failed: {}",
completion
.error
.as_deref()
.unwrap_or("unknown error from account/login/completed")
);
}
})
.await
}
async fn get_account_rate_limits(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> {
with_client(
"get-account-rate-limits",
endpoint,
config_overrides,
|client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let response = client.get_account_rate_limits()?;
println!("< account/rateLimits/read response: {response:?}");
Ok(())
},
)
.await
}
async fn model_list(endpoint: &Endpoint, config_overrides: &[String]) -> Result<()> {
with_client("model-list", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let response = client.model_list(ModelListParams::default())?;
println!("< model/list response: {response:?}");
Ok(())
})
.await
}
async fn thread_list(endpoint: &Endpoint, config_overrides: &[String], limit: u32) -> Result<()> {
with_client("thread-list", endpoint, config_overrides, |client| {
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let response = client.thread_list(ThreadListParams {
cursor: None,
limit: Some(limit),
sort_key: None,
model_providers: None,
source_kinds: None,
archived: None,
cwd: None,
search_term: None,
})?;
println!("< thread/list response: {response:?}");
Ok(())
})
.await
}
async fn with_client<T>(
command_name: &'static str,
endpoint: &Endpoint,
config_overrides: &[String],
f: impl FnOnce(&mut CodexClient) -> Result<T>,
) -> Result<T> {
let tracing = TestClientTracing::initialize(config_overrides).await?;
let command_span = info_span!(
"app_server_test_client.command",
otel.kind = "client",
otel.name = command_name,
app_server_test_client.command = command_name,
);
let trace_summary = command_span.in_scope(|| TraceSummary::capture(tracing.traces_enabled));
let result = command_span.in_scope(|| {
let mut client = CodexClient::connect(endpoint, config_overrides)?;
f(&mut client)
});
print_trace_summary(&trace_summary);
result
}
fn thread_increment_elicitation(url: &str, thread_id: String) -> Result<()> {
let endpoint = Endpoint::ConnectWs(url.to_string());
let mut client = CodexClient::connect(&endpoint, &[])?;
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let response =
client.thread_increment_elicitation(ThreadIncrementElicitationParams { thread_id })?;
println!("< thread/increment_elicitation response: {response:?}");
Ok(())
}
fn thread_decrement_elicitation(url: &str, thread_id: String) -> Result<()> {
let endpoint = Endpoint::ConnectWs(url.to_string());
let mut client = CodexClient::connect(&endpoint, &[])?;
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let response =
client.thread_decrement_elicitation(ThreadDecrementElicitationParams { thread_id })?;
println!("< thread/decrement_elicitation response: {response:?}");
Ok(())
}
fn live_elicitation_timeout_pause(
codex_bin: Option<PathBuf>,
url: Option<String>,
config_overrides: &[String],
model: String,
workspace: PathBuf,
script: Option<PathBuf>,
hold_seconds: u64,
) -> Result<()> {
if cfg!(windows) {
bail!("live-elicitation-timeout-pause currently requires a POSIX shell");
}
if hold_seconds <= 10 {
bail!("--hold-seconds must be greater than 10 to exceed the unified exec timeout");
}
let mut _background_server = None;
let websocket_url = match (codex_bin, url) {
(Some(_), Some(_)) => bail!("--codex-bin and --url are mutually exclusive"),
(Some(codex_bin), None) => {
let server = BackgroundAppServer::spawn(&codex_bin, config_overrides)?;
let websocket_url = server.url.clone();
_background_server = Some(server);
websocket_url
}
(None, Some(url)) => url,
(None, None) => "ws://127.0.0.1:4222".to_string(),
};
let script_path = script.unwrap_or_else(|| {
PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join("scripts")
.join("live_elicitation_hold.sh")
});
if !script_path.is_file() {
bail!("helper script not found: {}", script_path.display());
}
let workspace = workspace
.canonicalize()
.with_context(|| format!("failed to resolve workspace `{}`", workspace.display()))?;
let app_server_test_client_bin = std::env::current_exe()
.context("failed to resolve codex-app-server-test-client binary path")?;
let endpoint = Endpoint::ConnectWs(websocket_url.clone());
let mut client = CodexClient::connect(&endpoint, &[])?;
let initialize = client.initialize()?;
println!("< initialize response: {initialize:?}");
let thread_response = client.thread_start(ThreadStartParams {
model: Some(model),
..Default::default()
})?;
println!("< thread/start response: {thread_response:?}");
let thread_id = thread_response.thread.id;
let command = format!(
"APP_SERVER_URL={} APP_SERVER_TEST_CLIENT_BIN={} ELICITATION_HOLD_SECONDS={} sh {}",
shell_quote(&websocket_url),
shell_quote(&app_server_test_client_bin.display().to_string()),
hold_seconds,
shell_quote(&script_path.display().to_string()),
);
let prompt = format!(
"Use the `exec_command` tool exactly once. Set its `cmd` field to the exact shell command below. Do not rewrite it, do not split it, do not call any other tool, do not set `yield_time_ms`, and wait for the command to finish before replying.\n\n{command}\n\nAfter the command finishes, reply with exactly `DONE`."
);
let started_at = Instant::now();
let turn_response = client.turn_start(TurnStartParams {
thread_id: thread_id.clone(),
input: vec![V2UserInput::Text {
text: prompt,
text_elements: Vec::new(),
}],
approval_policy: Some(AskForApproval::Never),
sandbox_policy: Some(SandboxPolicy::DangerFullAccess),
effort: Some(ReasoningEffort::High),
cwd: Some(workspace),
..Default::default()
})?;
println!("< turn/start response: {turn_response:?}");
let stream_result = client.stream_turn(&thread_id, &turn_response.turn.id);
let elapsed = started_at.elapsed();
let validation_result = (|| -> Result<()> {
stream_result?;
let helper_output = client
.command_execution_outputs
.iter()
.find(|output| output.contains("[elicitation-hold]"))
.cloned()
.ok_or_else(|| anyhow::anyhow!("expected helper script markers in command output"))?;
let minimum_elapsed = Duration::from_secs(hold_seconds.saturating_sub(1));
if client.last_turn_status != Some(TurnStatus::Completed) {
bail!(
"expected completed turn, got {:?} (last error: {:?})",
client.last_turn_status,
client.last_turn_error_message
);
}
if !client
.command_execution_statuses
.contains(&CommandExecutionStatus::Completed)
{
bail!(
"expected a completed command execution, got {:?}",
client.command_execution_statuses
);
}
if !client.helper_done_seen || !helper_output.contains("[elicitation-hold] done") {
bail!(
"expected helper script completion marker in command output, got: {helper_output:?}"
);
}
if !client.unexpected_items_before_helper_done.is_empty() {
bail!(
"turn started new items before helper completion: {:?}",
client.unexpected_items_before_helper_done
);
}
if client.turn_completed_before_helper_done {
bail!("turn completed before helper script finished");
}
if elapsed < minimum_elapsed {
bail!(
"turn completed too quickly to prove timeout pause worked: elapsed={elapsed:?}, expected at least {minimum_elapsed:?}"
);
}
Ok(())
})();
match client.thread_decrement_elicitation(ThreadDecrementElicitationParams {
thread_id: thread_id.clone(),
}) {
Ok(response) => {
println!("[cleanup] thread/decrement_elicitation response after harness: {response:?}");
}
Err(err) => {
eprintln!("[cleanup] thread/decrement_elicitation ignored: {err:#}");
}
}
validation_result?;
println!(
"[live elicitation timeout pause summary] thread_id={thread_id}, turn_id={}, elapsed={elapsed:?}, command_statuses={:?}",
turn_response.turn.id, client.command_execution_statuses
);
Ok(())
}
fn ensure_dynamic_tools_unused(
dynamic_tools: &Option<Vec<DynamicToolSpec>>,
command: &str,
) -> Result<()> {
if dynamic_tools.is_some() {
bail!(
"dynamic tools are only supported for v2 thread/start; remove --dynamic-tools for {command} or use send-message-v2"
);
}
Ok(())
}
fn parse_dynamic_tools_arg(dynamic_tools: &Option<String>) -> Result<Option<Vec<DynamicToolSpec>>> {
let Some(raw_arg) = dynamic_tools.as_deref() else {
return Ok(None);
};
let raw_json = if let Some(path) = raw_arg.strip_prefix('@') {
fs::read_to_string(Path::new(path))
.with_context(|| format!("read dynamic tools file {path}"))?
} else {
raw_arg.to_string()
};
let value: Value = serde_json::from_str(&raw_json).context("parse dynamic tools JSON")?;
let tools = match value {
Value::Array(_) => serde_json::from_value(value).context("decode dynamic tools array")?,
Value::Object(_) => vec![serde_json::from_value(value).context("decode dynamic tool")?],
_ => bail!("dynamic tools JSON must be an object or array"),
};
Ok(Some(tools))
}
enum ClientTransport {
Stdio {
child: Child,
stdin: Option<ChildStdin>,
stdout: BufReader<ChildStdout>,
},
WebSocket {
url: String,
socket: Box<WebSocket<MaybeTlsStream<TcpStream>>>,
},
}
struct CodexClient {
transport: ClientTransport,
pending_notifications: VecDeque<JSONRPCNotification>,
command_approval_behavior: CommandApprovalBehavior,
command_approval_count: usize,
command_approval_item_ids: Vec<String>,
command_execution_statuses: Vec<CommandExecutionStatus>,
command_execution_outputs: Vec<String>,
command_output_stream: String,
command_item_started: bool,
helper_done_seen: bool,
turn_completed_before_helper_done: bool,
unexpected_items_before_helper_done: Vec<ThreadItem>,
last_turn_status: Option<TurnStatus>,
last_turn_error_message: Option<String>,
}
#[derive(Debug, Clone, Copy)]
enum CommandApprovalBehavior {
AlwaysAccept,
AbortOn(usize),
}
fn item_started_before_helper_done_is_unexpected(
item: &ThreadItem,
command_item_started: bool,
helper_done_seen: bool,
) -> bool {
if !command_item_started || helper_done_seen {
return false;
}
!matches!(item, ThreadItem::UserMessage { .. })
}
impl CodexClient {
fn connect(endpoint: &Endpoint, config_overrides: &[String]) -> Result<Self> {
match endpoint {
Endpoint::SpawnCodex(codex_bin) => Self::spawn_stdio(codex_bin, config_overrides),
Endpoint::ConnectWs(url) => Self::connect_websocket(url),
}
}
fn spawn_stdio(codex_bin: &Path, config_overrides: &[String]) -> Result<Self> {
let codex_bin_display = codex_bin.display();
let mut cmd = Command::new(codex_bin);
if let Some(codex_bin_parent) = codex_bin.parent() {
let mut path = OsString::from(codex_bin_parent.as_os_str());
if let Some(existing_path) = std::env::var_os("PATH") {
path.push(":");
path.push(existing_path);
}
cmd.env("PATH", path);
}
for override_kv in config_overrides {
cmd.arg("--config").arg(override_kv);
}
let mut codex_app_server = cmd
.arg("app-server")
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::inherit())
.spawn()
.with_context(|| format!("failed to start `{codex_bin_display}` app-server"))?;
let stdin = codex_app_server
.stdin
.take()
.context("codex app-server stdin unavailable")?;
let stdout = codex_app_server
.stdout
.take()
.context("codex app-server stdout unavailable")?;
Ok(Self {
transport: ClientTransport::Stdio {
child: codex_app_server,
stdin: Some(stdin),
stdout: BufReader::new(stdout),
},
pending_notifications: VecDeque::new(),
command_approval_behavior: CommandApprovalBehavior::AlwaysAccept,
command_approval_count: 0,
command_approval_item_ids: Vec::new(),
command_execution_statuses: Vec::new(),
command_execution_outputs: Vec::new(),
command_output_stream: String::new(),
command_item_started: false,
helper_done_seen: false,
turn_completed_before_helper_done: false,
unexpected_items_before_helper_done: Vec::new(),
last_turn_status: None,
last_turn_error_message: None,
})
}
fn connect_websocket(url: &str) -> Result<Self> {
let parsed = Url::parse(url).with_context(|| format!("invalid websocket URL `{url}`"))?;
let deadline = Instant::now() + Duration::from_secs(10);
let (socket, _response) = loop {
match connect(parsed.as_str()) {
Ok(result) => break result,
Err(err) => {
if Instant::now() >= deadline {
return Err(err).with_context(|| {
format!(
"failed to connect to websocket app-server at `{url}`; if no server is running, start one with `codex-app-server-test-client serve --listen {url}`"
)
});
}
thread::sleep(Duration::from_millis(50));
}
}
};
Ok(Self {
transport: ClientTransport::WebSocket {
url: url.to_string(),
socket: Box::new(socket),
},
pending_notifications: VecDeque::new(),
command_approval_behavior: CommandApprovalBehavior::AlwaysAccept,
command_approval_count: 0,
command_approval_item_ids: Vec::new(),
command_execution_statuses: Vec::new(),
command_execution_outputs: Vec::new(),
command_output_stream: String::new(),
command_item_started: false,
helper_done_seen: false,
turn_completed_before_helper_done: false,
unexpected_items_before_helper_done: Vec::new(),
last_turn_status: None,
last_turn_error_message: None,
})
}
fn note_helper_output(&mut self, output: &str) {
self.command_output_stream.push_str(output);
if self
.command_output_stream
.contains("[elicitation-hold] done")
{
self.helper_done_seen = true;
}
}
fn initialize(&mut self) -> Result<InitializeResponse> {
self.initialize_with_experimental_api(/*experimental_api*/ true)
}
fn initialize_with_experimental_api(
&mut self,
experimental_api: bool,
) -> Result<InitializeResponse> {
let request_id = self.request_id();
let request = ClientRequest::Initialize {
request_id: request_id.clone(),
params: InitializeParams {
client_info: ClientInfo {
name: "codex-toy-app-server".to_string(),
title: Some("Codex Toy App Server".to_string()),
version: env!("CARGO_PKG_VERSION").to_string(),
},
capabilities: Some(InitializeCapabilities {
experimental_api,
opt_out_notification_methods: Some(
NOTIFICATIONS_TO_OPT_OUT
.iter()
.map(|method| (*method).to_string())
.collect(),
),
}),
},
};
let response: InitializeResponse = self.send_request(request, request_id, "initialize")?;
// Complete the initialize handshake.
let initialized = JSONRPCMessage::Notification(JSONRPCNotification {
method: "initialized".to_string(),
params: None,
});
self.write_jsonrpc_message(initialized)?;
Ok(response)
}
fn thread_start(&mut self, params: ThreadStartParams) -> Result<ThreadStartResponse> {
let request_id = self.request_id();
let request = ClientRequest::ThreadStart {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "thread/start")
}
fn thread_resume(&mut self, params: ThreadResumeParams) -> Result<ThreadResumeResponse> {
let request_id = self.request_id();
let request = ClientRequest::ThreadResume {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "thread/resume")
}
fn turn_start(&mut self, params: TurnStartParams) -> Result<TurnStartResponse> {
let request_id = self.request_id();
let request = ClientRequest::TurnStart {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "turn/start")
}
fn login_account_chatgpt(&mut self) -> Result<LoginAccountResponse> {
let request_id = self.request_id();
let request = ClientRequest::LoginAccount {
request_id: request_id.clone(),
params: codex_app_server_protocol::LoginAccountParams::Chatgpt,
};
self.send_request(request, request_id, "account/login/start")
}
fn get_account_rate_limits(&mut self) -> Result<GetAccountRateLimitsResponse> {
let request_id = self.request_id();
let request = ClientRequest::GetAccountRateLimits {
request_id: request_id.clone(),
params: None,
};
self.send_request(request, request_id, "account/rateLimits/read")
}
fn model_list(&mut self, params: ModelListParams) -> Result<ModelListResponse> {
let request_id = self.request_id();
let request = ClientRequest::ModelList {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "model/list")
}
fn thread_list(&mut self, params: ThreadListParams) -> Result<ThreadListResponse> {
let request_id = self.request_id();
let request = ClientRequest::ThreadList {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "thread/list")
}
fn thread_increment_elicitation(
&mut self,
params: ThreadIncrementElicitationParams,
) -> Result<ThreadIncrementElicitationResponse> {
let request_id = self.request_id();
let request = ClientRequest::ThreadIncrementElicitation {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "thread/increment_elicitation")
}
fn thread_decrement_elicitation(
&mut self,
params: ThreadDecrementElicitationParams,
) -> Result<ThreadDecrementElicitationResponse> {
let request_id = self.request_id();
let request = ClientRequest::ThreadDecrementElicitation {
request_id: request_id.clone(),
params,
};
self.send_request(request, request_id, "thread/decrement_elicitation")
}
fn wait_for_account_login_completion(
&mut self,
expected_login_id: &str,
) -> Result<AccountLoginCompletedNotification> {
loop {
let notification = self.next_notification()?;
if let Ok(server_notification) = ServerNotification::try_from(notification) {
match server_notification {
ServerNotification::AccountLoginCompleted(completion) => {
if completion.login_id.as_deref() == Some(expected_login_id) {
return Ok(completion);
}
println!(
"[ignoring account/login/completed for unexpected login_id: {:?}]",
completion.login_id
);
}
ServerNotification::AccountRateLimitsUpdated(snapshot) => {
println!("< accountRateLimitsUpdated notification: {snapshot:?}");
}
_ => {}
}
}
}
}
fn stream_turn(&mut self, thread_id: &str, turn_id: &str) -> Result<()> {
loop {
let notification = self.next_notification()?;
let Ok(server_notification) = ServerNotification::try_from(notification) else {
continue;
};
match server_notification {
ServerNotification::ThreadStarted(payload) => {
if payload.thread.id == thread_id {
println!("< thread/started notification: {:?}", payload.thread);
}
}
ServerNotification::TurnStarted(payload) => {
if payload.turn.id == turn_id {
println!("< turn/started notification: {:?}", payload.turn.status);
}
}
ServerNotification::AgentMessageDelta(delta) => {
print!("{}", delta.delta);
std::io::stdout().flush().ok();
}
ServerNotification::CommandExecutionOutputDelta(delta) => {
self.note_helper_output(&delta.delta);
print!("{}", delta.delta);
std::io::stdout().flush().ok();
}
ServerNotification::TerminalInteraction(delta) => {
println!("[stdin sent: {}]", delta.stdin);
std::io::stdout().flush().ok();
}
ServerNotification::ItemStarted(payload) => {
if matches!(payload.item, ThreadItem::CommandExecution { .. }) {
if self.command_item_started && !self.helper_done_seen {
self.unexpected_items_before_helper_done
.push(payload.item.clone());
}
self.command_item_started = true;
} else if item_started_before_helper_done_is_unexpected(
&payload.item,
self.command_item_started,
self.helper_done_seen,
) {
self.unexpected_items_before_helper_done
.push(payload.item.clone());
}
println!("\n< item started: {:?}", payload.item);
}
ServerNotification::ItemCompleted(payload) => {
if let ThreadItem::CommandExecution {
status,
aggregated_output,
..
} = payload.item.clone()
{
self.command_execution_statuses.push(status);
if let Some(aggregated_output) = aggregated_output {
self.note_helper_output(&aggregated_output);
self.command_execution_outputs.push(aggregated_output);
}
}
println!("< item completed: {:?}", payload.item);
}
ServerNotification::TurnCompleted(payload) => {
if payload.turn.id == turn_id {
self.last_turn_status = Some(payload.turn.status.clone());
if self.command_item_started && !self.helper_done_seen {
self.turn_completed_before_helper_done = true;
}
self.last_turn_error_message = payload
.turn
.error
.as_ref()
.map(|error| error.message.clone());
println!("\n< turn/completed notification: {:?}", payload.turn.status);
if payload.turn.status == TurnStatus::Failed
&& let Some(error) = payload.turn.error
{
println!("[turn error] {}", error.message);
}
break;
}
}
ServerNotification::McpToolCallProgress(payload) => {
println!("< MCP tool progress: {}", payload.message);
}
_ => {
println!("[UNKNOWN SERVER NOTIFICATION] {server_notification:?}");
}
}
}
Ok(())
}
fn stream_notifications_forever(&mut self) -> Result<()> {
loop {
let _ = self.next_notification()?;
}
}
fn send_request<T>(
&mut self,
request: ClientRequest,
request_id: RequestId,
method: &str,
) -> Result<T>
where
T: DeserializeOwned,
{
let request_span = info_span!(
"app_server_test_client.request",
otel.kind = "client",
otel.name = method,
rpc.system = "jsonrpc",
rpc.method = method,
rpc.request_id = ?request_id,
);
request_span.in_scope(|| {
self.write_request(&request)?;
self.wait_for_response(request_id, method)
})
}
fn write_request(&mut self, request: &ClientRequest) -> Result<()> {
let request_value = serde_json::to_value(request)?;
let mut request: JSONRPCRequest = serde_json::from_value(request_value)
.context("client request was not a valid JSON-RPC request")?;
request.trace = current_span_w3c_trace_context();
let request_json = serde_json::to_string(&request)?;
let request_pretty = serde_json::to_string_pretty(&request)?;
print_multiline_with_prefix("> ", &request_pretty);
self.write_payload(&request_json)
}
fn wait_for_response<T>(&mut self, request_id: RequestId, method: &str) -> Result<T>
where
T: DeserializeOwned,
{
loop {
let message = self.read_jsonrpc_message()?;
match message {
JSONRPCMessage::Response(JSONRPCResponse { id, result }) => {
if id == request_id {
return serde_json::from_value(result)
.with_context(|| format!("{method} response missing payload"));
}
}
JSONRPCMessage::Error(err) => {
if err.id == request_id {
bail!("{method} failed: {err:?}");
}
}
JSONRPCMessage::Notification(notification) => {
self.pending_notifications.push_back(notification);
}
JSONRPCMessage::Request(request) => {
self.handle_server_request(request)?;
}
}
}
}
fn next_notification(&mut self) -> Result<JSONRPCNotification> {
if let Some(notification) = self.pending_notifications.pop_front() {
return Ok(notification);
}
loop {
let message = self.read_jsonrpc_message()?;
match message {
JSONRPCMessage::Notification(notification) => return Ok(notification),
JSONRPCMessage::Response(_) | JSONRPCMessage::Error(_) => {
// No outstanding requests, so ignore stray responses/errors for now.
continue;
}
JSONRPCMessage::Request(request) => {
self.handle_server_request(request)?;
}
}
}
}
fn read_jsonrpc_message(&mut self) -> Result<JSONRPCMessage> {
loop {
let raw = self.read_payload()?;
let trimmed = raw.trim();
if trimmed.is_empty() {
continue;
}
let parsed: Value =
serde_json::from_str(trimmed).context("response was not valid JSON-RPC")?;
let pretty = serde_json::to_string_pretty(&parsed)?;
print_multiline_with_prefix("< ", &pretty);
let message: JSONRPCMessage = serde_json::from_value(parsed)
.context("response was not a valid JSON-RPC message")?;
return Ok(message);
}
}
fn request_id(&self) -> RequestId {
RequestId::String(Uuid::new_v4().to_string())
}
fn handle_server_request(&mut self, request: JSONRPCRequest) -> Result<()> {
let server_request = ServerRequest::try_from(request)
.context("failed to deserialize ServerRequest from JSONRPCRequest")?;
match server_request {
ServerRequest::CommandExecutionRequestApproval { request_id, params } => {
self.handle_command_execution_request_approval(request_id, params)?;
}
ServerRequest::FileChangeRequestApproval { request_id, params } => {
self.approve_file_change_request(request_id, params)?;
}
other => {
bail!("received unsupported server request: {other:?}");
}
}
Ok(())
}
fn handle_command_execution_request_approval(
&mut self,
request_id: RequestId,
params: CommandExecutionRequestApprovalParams,
) -> Result<()> {
let CommandExecutionRequestApprovalParams {
thread_id,
turn_id,
item_id,
approval_id,
reason,
network_approval_context,
command,
cwd,
command_actions,
additional_permissions,
skill_metadata,
proposed_execpolicy_amendment,
proposed_network_policy_amendments,
available_decisions,
} = params;
println!(
"\n< commandExecution approval requested for thread {thread_id}, turn {turn_id}, item {item_id}, approval {}",
approval_id.as_deref().unwrap_or("<none>")
);
self.command_approval_count += 1;
self.command_approval_item_ids.push(item_id.clone());
if let Some(reason) = reason.as_deref() {
println!("< reason: {reason}");
}
if let Some(network_approval_context) = network_approval_context.as_ref() {
println!("< network approval context: {network_approval_context:?}");
}
if let Some(available_decisions) = available_decisions.as_ref() {
println!("< available decisions: {available_decisions:?}");
}
if let Some(command) = command.as_deref() {
println!("< command: {command}");
}
if let Some(cwd) = cwd.as_ref() {
println!("< cwd: {}", cwd.display());
}
if let Some(command_actions) = command_actions.as_ref()
&& !command_actions.is_empty()
{
println!("< command actions: {command_actions:?}");
}
if let Some(additional_permissions) = additional_permissions.as_ref() {
println!("< additional permissions: {additional_permissions:?}");
}
if let Some(skill_metadata) = skill_metadata.as_ref() {
println!("< skill metadata: {skill_metadata:?}");
}
if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() {
println!("< proposed execpolicy amendment: {execpolicy_amendment:?}");
}
if let Some(network_policy_amendments) = proposed_network_policy_amendments.as_ref() {
println!("< proposed network policy amendments: {network_policy_amendments:?}");
}
let decision = match self.command_approval_behavior {
CommandApprovalBehavior::AlwaysAccept => CommandExecutionApprovalDecision::Accept,
CommandApprovalBehavior::AbortOn(index) if self.command_approval_count == index => {
CommandExecutionApprovalDecision::Cancel
}
CommandApprovalBehavior::AbortOn(_) => CommandExecutionApprovalDecision::Accept,
};
let response = CommandExecutionRequestApprovalResponse {
decision: decision.clone(),
};
self.send_server_request_response(request_id, &response)?;
println!(
"< commandExecution decision for approval #{} on item {item_id}: {:?}",
self.command_approval_count, decision
);
Ok(())
}
fn approve_file_change_request(
&mut self,
request_id: RequestId,
params: FileChangeRequestApprovalParams,
) -> Result<()> {
let FileChangeRequestApprovalParams {
thread_id,
turn_id,
item_id,
reason,
grant_root,
} = params;
println!(
"\n< fileChange approval requested for thread {thread_id}, turn {turn_id}, item {item_id}"
);
if let Some(reason) = reason.as_deref() {
println!("< reason: {reason}");
}
if let Some(grant_root) = grant_root.as_deref() {
println!("< grant root: {}", grant_root.display());
}
let response = FileChangeRequestApprovalResponse {
decision: FileChangeApprovalDecision::Accept,
};
self.send_server_request_response(request_id, &response)?;
println!("< approved fileChange request for item {item_id}");
Ok(())
}
fn send_server_request_response<T>(&mut self, request_id: RequestId, response: &T) -> Result<()>
where
T: Serialize,
{
let message = JSONRPCMessage::Response(JSONRPCResponse {
id: request_id,
result: serde_json::to_value(response)?,
});
self.write_jsonrpc_message(message)
}
fn write_jsonrpc_message(&mut self, message: JSONRPCMessage) -> Result<()> {
let payload = serde_json::to_string(&message)?;
let pretty = serde_json::to_string_pretty(&message)?;
print_multiline_with_prefix("> ", &pretty);
self.write_payload(&payload)
}
fn write_payload(&mut self, payload: &str) -> Result<()> {
match &mut self.transport {
ClientTransport::Stdio { stdin, .. } => {
if let Some(stdin) = stdin.as_mut() {
writeln!(stdin, "{payload}")?;
stdin
.flush()
.context("failed to flush payload to codex app-server")?;
return Ok(());
}
bail!("codex app-server stdin closed")
}
ClientTransport::WebSocket { socket, url } => {
socket
.send(Message::Text(payload.to_string().into()))
.with_context(|| format!("failed to write websocket message to `{url}`"))?;
Ok(())
}
}
}
fn read_payload(&mut self) -> Result<String> {
match &mut self.transport {
ClientTransport::Stdio { stdout, .. } => {
let mut response_line = String::new();
let bytes = stdout
.read_line(&mut response_line)
.context("failed to read from codex app-server")?;
if bytes == 0 {
bail!("codex app-server closed stdout");
}
Ok(response_line)
}
ClientTransport::WebSocket { socket, url } => loop {
let frame = socket
.read()
.with_context(|| format!("failed to read websocket message from `{url}`"))?;
match frame {
Message::Text(text) => return Ok(text.to_string()),
Message::Binary(_) | Message::Ping(_) | Message::Pong(_) => continue,
Message::Close(_) => {
bail!("websocket app-server at `{url}` closed the connection")
}
Message::Frame(_) => continue,
}
},
}
}
}
fn print_multiline_with_prefix(prefix: &str, payload: &str) {
for line in payload.lines() {
println!("{prefix}{line}");
}
}
struct TestClientTracing {
_otel_provider: Option<OtelProvider>,
traces_enabled: bool,
}
impl TestClientTracing {
async fn initialize(config_overrides: &[String]) -> Result<Self> {
let cli_kv_overrides = CliConfigOverrides {
raw_overrides: config_overrides.to_vec(),
}
.parse_overrides()
.map_err(|e| anyhow::anyhow!("error parsing -c overrides: {e}"))?;
let config = Config::load_with_cli_overrides(cli_kv_overrides)
.await
.context("error loading config")?;
let otel_provider = codex_core::otel_init::build_provider(
&config,
env!("CARGO_PKG_VERSION"),
Some(OTEL_SERVICE_NAME),
DEFAULT_ANALYTICS_ENABLED,
)
.map_err(|e| anyhow::anyhow!("error loading otel config: {e}"))?;
let traces_enabled = otel_provider
.as_ref()
.and_then(|provider| provider.tracer_provider.as_ref())
.is_some();
if let Some(provider) = otel_provider.as_ref()
&& traces_enabled
{
let _ = tracing_subscriber::registry()
.with(provider.tracing_layer())
.try_init();
}
Ok(Self {
traces_enabled,
_otel_provider: otel_provider,
})
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum TraceSummary {
Enabled { url: String },
Disabled,
}
impl TraceSummary {
fn capture(traces_enabled: bool) -> Self {
if !traces_enabled {
return Self::Disabled;
}
current_span_w3c_trace_context()
.as_ref()
.and_then(trace_url_from_context)
.map_or(Self::Disabled, |url| Self::Enabled { url })
}
}
fn trace_url_from_context(trace: &W3cTraceContext) -> Option<String> {
let traceparent = trace.traceparent.as_deref()?;
let mut parts = traceparent.split('-');
match (parts.next(), parts.next(), parts.next(), parts.next()) {
(Some(_version), Some(trace_id), Some(_span_id), Some(_trace_flags))
if trace_id.len() == 32 =>
{
Some(format!("go/trace/{trace_id}"))
}
_ => None,
}
}
fn print_trace_summary(trace_summary: &TraceSummary) {
println!("\n[Datadog trace]");
match trace_summary {
TraceSummary::Enabled { url } => println!("{url}\n"),
TraceSummary::Disabled => println!("{TRACE_DISABLED_MESSAGE}\n"),
}
}
impl Drop for CodexClient {
fn drop(&mut self) {
let ClientTransport::Stdio { child, stdin, .. } = &mut self.transport else {
return;
};
let _ = stdin.take();
if let Ok(Some(status)) = child.try_wait() {
println!("[codex app-server exited: {status}]");
return;
}
let deadline = SystemTime::now() + APP_SERVER_GRACEFUL_SHUTDOWN_TIMEOUT;
loop {
if let Ok(Some(status)) = child.try_wait() {
println!("[codex app-server exited: {status}]");
return;
}
if SystemTime::now() >= deadline {
break;
}
thread::sleep(APP_SERVER_GRACEFUL_SHUTDOWN_POLL_INTERVAL);
}
let _ = child.kill();
let _ = child.wait();
}
}