Stabilize plan item app-server tests (#14058)

## What changed
- run the two plan-mode app-server tests on a multi-thread Tokio runtime
instead of the default single-thread test runtime
- stop relying on wiremock teardown expectations for `/responses` and
explicitly wait for the expected request count after the turn completes

## Why this fixes the flake
- this failure was showing up on Windows ARM as a late wiremock panic
saying the mock server saw zero `/responses` calls, but the real issue
was that the test could stall around app-server startup and only fail
during teardown
- moving these tests to the same multi-thread runtime used by the other
collaboration-mode app-server tests removes that startup scheduling race
- asserting the `/responses` count directly makes the test
deterministic: we now wait for the real POST instead of depending on a
drop-time verification that can hide the underlying timing issue

## Scope
- test-only change; no production logic changes
This commit is contained in:
Ahmed Ibrahim 2026-03-09 10:24:18 -07:00 committed by GitHub
parent 5d9db0f995
commit 6b68d1ef66
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,7 +1,8 @@
use anyhow::Result;
use anyhow::anyhow;
use anyhow::bail;
use app_test_support::McpProcess;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_mock_responses_server_sequence_unchecked;
use app_test_support::to_response;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
@ -28,11 +29,13 @@ use pretty_assertions::assert_eq;
use std::collections::BTreeMap;
use std::path::Path;
use tempfile::TempDir;
use tokio::time::sleep;
use tokio::time::timeout;
use wiremock::MockServer;
const DEFAULT_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(10);
#[tokio::test]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn plan_mode_uses_proposed_plan_block_for_plan_item() -> Result<()> {
skip_if_no_network!(Ok(()));
@ -45,7 +48,7 @@ async fn plan_mode_uses_proposed_plan_block_for_plan_item() -> Result<()> {
responses::ev_assistant_message("msg-1", &full_message),
responses::ev_completed("resp-1"),
])];
let server = create_mock_responses_server_sequence(responses).await;
let server = create_mock_responses_server_sequence_unchecked(responses).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
@ -56,6 +59,7 @@ async fn plan_mode_uses_proposed_plan_block_for_plan_item() -> Result<()> {
let turn = start_plan_mode_turn(&mut mcp).await?;
let (_, completed_items, plan_deltas, turn_completed) =
collect_turn_notifications(&mut mcp).await?;
wait_for_responses_request_count(&server, 1).await?;
assert_eq!(turn_completed.turn.id, turn.id);
assert_eq!(turn_completed.turn.status, TurnStatus::Completed);
@ -93,7 +97,7 @@ async fn plan_mode_uses_proposed_plan_block_for_plan_item() -> Result<()> {
Ok(())
}
#[tokio::test]
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
async fn plan_mode_without_proposed_plan_does_not_emit_plan_item() -> Result<()> {
skip_if_no_network!(Ok(()));
@ -102,7 +106,7 @@ async fn plan_mode_without_proposed_plan_does_not_emit_plan_item() -> Result<()>
responses::ev_assistant_message("msg-1", "Done"),
responses::ev_completed("resp-1"),
])];
let server = create_mock_responses_server_sequence(responses).await;
let server = create_mock_responses_server_sequence_unchecked(responses).await;
let codex_home = TempDir::new()?;
create_config_toml(codex_home.path(), &server.uri())?;
@ -112,6 +116,7 @@ async fn plan_mode_without_proposed_plan_does_not_emit_plan_item() -> Result<()>
let _turn = start_plan_mode_turn(&mut mcp).await?;
let (_, completed_items, plan_deltas, _) = collect_turn_notifications(&mut mcp).await?;
wait_for_responses_request_count(&server, 1).await?;
let has_plan_item = completed_items
.iter()
@ -214,6 +219,36 @@ async fn collect_turn_notifications(
}
}
async fn wait_for_responses_request_count(
server: &MockServer,
expected_count: usize,
) -> Result<()> {
timeout(DEFAULT_READ_TIMEOUT, async {
loop {
let Some(requests) = server.received_requests().await else {
bail!("wiremock did not record requests");
};
let responses_request_count = requests
.iter()
.filter(|request| {
request.method == "POST" && request.url.path().ends_with("/responses")
})
.count();
if responses_request_count == expected_count {
return Ok::<(), anyhow::Error>(());
}
if responses_request_count > expected_count {
bail!(
"expected exactly {expected_count} /responses requests, got {responses_request_count}"
);
}
sleep(std::time::Duration::from_millis(10)).await;
}
})
.await??;
Ok(())
}
fn create_config_toml(codex_home: &Path, server_uri: &str) -> std::io::Result<()> {
let features = BTreeMap::from([(Feature::CollaborationModes, true)]);
let feature_entries = features