From 1980b6ce00ff8a03fffb670e01077f5dafd91729 Mon Sep 17 00:00:00 2001 From: Max Johnson <162359438+maxj-oai@users.noreply.github.com> Date: Thu, 5 Mar 2026 10:16:58 -0800 Subject: [PATCH] treat SIGTERM like ctrl-c for graceful shutdown (#13594) treat SIGTERM the same as SIGINT for graceful app-server websocket shutdown --- codex-rs/app-server/src/lib.rs | 42 ++++++++---- .../v2/connection_handling_websocket_unix.rs | 65 ++++++++++++++++++- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/codex-rs/app-server/src/lib.rs b/codex-rs/app-server/src/lib.rs index 580282d5d..e8dd1638e 100644 --- a/codex-rs/app-server/src/lib.rs +++ b/codex-rs/app-server/src/lib.rs @@ -124,6 +124,25 @@ enum ShutdownAction { Finish, } +async fn shutdown_signal() -> IoResult<()> { + #[cfg(unix)] + { + use tokio::signal::unix::SignalKind; + use tokio::signal::unix::signal; + + let mut term = signal(SignalKind::terminate())?; + tokio::select! { + ctrl_c_result = tokio::signal::ctrl_c() => ctrl_c_result, + _ = term.recv() => Ok(()), + } + } + + #[cfg(not(unix))] + { + tokio::signal::ctrl_c().await + } +} + impl ShutdownState { fn requested(&self) -> bool { self.requested @@ -133,7 +152,7 @@ impl ShutdownState { self.forced } - fn on_ctrl_c(&mut self, connection_count: usize, running_turn_count: usize) { + fn on_signal(&mut self, connection_count: usize, running_turn_count: usize) { if self.requested { self.forced = true; return; @@ -142,7 +161,7 @@ impl ShutdownState { self.requested = true; self.last_logged_running_turn_count = None; info!( - "received Ctrl-C; entering graceful restart drain (connections={}, runningAssistantTurns={}, requests still accepted until no assistant turns are running)", + "received shutdown signal; entering graceful restart drain (connections={}, runningAssistantTurns={}, requests still accepted until no assistant turns are running)", connection_count, running_turn_count, ); } @@ -155,11 +174,11 @@ impl ShutdownState { if self.forced || running_turn_count == 0 { if self.forced { info!( - "received second Ctrl-C; forcing restart with {running_turn_count} running assistant turn(s) and {connection_count} connection(s)" + "received second shutdown signal; forcing restart with {running_turn_count} running assistant turn(s) and {connection_count} connection(s)" ); } else { info!( - "Ctrl-C restart: no assistant turns running; stopping acceptor and disconnecting {connection_count} connection(s)" + "shutdown signal restart: no assistant turns running; stopping acceptor and disconnecting {connection_count} connection(s)" ); } return ShutdownAction::Finish; @@ -167,7 +186,7 @@ impl ShutdownState { if self.last_logged_running_turn_count != Some(running_turn_count) { info!( - "Ctrl-C restart: waiting for {running_turn_count} running assistant turn(s) to finish" + "shutdown signal restart: waiting for {running_turn_count} running assistant turn(s) to finish" ); self.last_logged_running_turn_count = Some(running_turn_count); } @@ -359,8 +378,7 @@ pub async fn run_main_with_transport( }; let single_client_mode = matches!(&transport_runtime, TransportRuntime::Stdio); let shutdown_when_no_connections = single_client_mode; - let graceful_ctrl_c_restart_enabled = !single_client_mode; - + let graceful_signal_restart_enabled = !single_client_mode; // Parse CLI overrides once and derive the base Config eagerly so later // components do not need to work with raw TOML values. let cli_kv_overrides = cli_config_overrides.parse_overrides().map_err(|e| { @@ -614,14 +632,14 @@ pub async fn run_main_with_transport( } tokio::select! { - ctrl_c_result = tokio::signal::ctrl_c(), if graceful_ctrl_c_restart_enabled && !shutdown_state.forced() => { - if let Err(err) = ctrl_c_result { - warn!("failed to listen for Ctrl-C during graceful restart drain: {err}"); + shutdown_signal_result = shutdown_signal(), if graceful_signal_restart_enabled && !shutdown_state.forced() => { + if let Err(err) = shutdown_signal_result { + warn!("failed to listen for shutdown signal during graceful restart drain: {err}"); } let running_turn_count = *running_turn_count_rx.borrow(); - shutdown_state.on_ctrl_c(connections.len(), running_turn_count); + shutdown_state.on_signal(connections.len(), running_turn_count); } - changed = running_turn_count_rx.changed(), if graceful_ctrl_c_restart_enabled && shutdown_state.requested() => { + changed = running_turn_count_rx.changed(), if graceful_signal_restart_enabled && shutdown_state.requested() => { if changed.is_err() { warn!("running-turn watcher closed during graceful restart drain"); } diff --git a/codex-rs/app-server/tests/suite/v2/connection_handling_websocket_unix.rs b/codex-rs/app-server/tests/suite/v2/connection_handling_websocket_unix.rs index c95f78f01..38bb4abbd 100644 --- a/codex-rs/app-server/tests/suite/v2/connection_handling_websocket_unix.rs +++ b/codex-rs/app-server/tests/suite/v2/connection_handling_websocket_unix.rs @@ -83,6 +83,57 @@ async fn websocket_transport_second_ctrl_c_forces_exit_while_turn_running() -> R Ok(()) } +#[tokio::test] +async fn websocket_transport_sigterm_waits_for_running_turn_before_exit() -> Result<()> { + let GracefulCtrlCFixture { + _codex_home, + _server, + mut process, + mut ws, + } = start_ctrl_c_restart_fixture(Duration::from_secs(3)).await?; + + send_sigterm(&process)?; + assert_process_does_not_exit_within(&mut process, Duration::from_millis(300)).await?; + + let status = wait_for_process_exit_within( + &mut process, + Duration::from_secs(10), + "timed out waiting for graceful SIGTERM restart shutdown", + ) + .await?; + assert!(status.success(), "expected graceful exit, got {status}"); + + expect_websocket_disconnect(&mut ws).await?; + + Ok(()) +} + +#[tokio::test] +async fn websocket_transport_second_sigterm_forces_exit_while_turn_running() -> Result<()> { + let GracefulCtrlCFixture { + _codex_home, + _server, + mut process, + mut ws, + } = start_ctrl_c_restart_fixture(Duration::from_secs(3)).await?; + + send_sigterm(&process)?; + assert_process_does_not_exit_within(&mut process, Duration::from_millis(300)).await?; + + send_sigterm(&process)?; + let status = wait_for_process_exit_within( + &mut process, + Duration::from_secs(2), + "timed out waiting for forced SIGTERM restart shutdown", + ) + .await?; + assert!(status.success(), "expected graceful exit, got {status}"); + + expect_websocket_disconnect(&mut ws).await?; + + Ok(()) +} + struct GracefulCtrlCFixture { _codex_home: TempDir, _server: wiremock::MockServer, @@ -180,16 +231,24 @@ async fn wait_for_responses_post(server: &wiremock::MockServer, wait_for: Durati } fn send_sigint(process: &Child) -> Result<()> { + send_signal(process, "-INT") +} + +fn send_sigterm(process: &Child) -> Result<()> { + send_signal(process, "-TERM") +} + +fn send_signal(process: &Child, signal: &str) -> Result<()> { let pid = process .id() .context("websocket app-server process has no pid")?; let status = StdCommand::new("kill") - .arg("-INT") + .arg(signal) .arg(pid.to_string()) .status() - .context("failed to invoke kill -INT")?; + .with_context(|| format!("failed to invoke kill {signal}"))?; if !status.success() { - bail!("kill -INT exited with {status}"); + bail!("kill {signal} exited with {status}"); } Ok(()) }