fix(app-server): fix approval events in review mode (#10416)

One of our partners flagged that they were seeing the wrong order of
events when running `review/start` with command exec approvals:
```
{"method":"item/commandExecution/requestApproval","id":0,"params":{"threadId":"019c0b6b-6a42-7c02-99c4-98c80e88ac27","turnId":"0","itemId":"0","reason":"`/bin/zsh -lc 'git show b7a92b4eacf262c575f26b1e1ed621a357642e55 --stat'` requires approval: Xcode-required approval: Require explicit user confirmation for all commands.","proposedExecpolicyAmendment":null}}

{"method":"item/started","params":{"item":{"type":"commandExecution","id":"call_AEjlbHqLYNM7kbU3N6uw1CNi","command":"/bin/zsh -lc 'git show b7a92b4eacf262c575f26b1e1ed621a357642e55 --stat'","cwd":"/Users/devingreen/Desktop/SampleProject","processId":null,"status":"inProgress","commandActions":[{"type":"unknown","command":"git show b7a92b4eacf262c575f26b1e1ed621a357642e55 --stat"}],"aggregatedOutput":null,"exitCode":null,"durationMs":null},"threadId":"019c0b6b-6a42-7c02-99c4-98c80e88ac27","turnId":"0"}}
```

**Key fix**: In the review sub‑agent delegate we were forwarding exec
(and patch) approvals using the parent turn id (`parent_ctx.sub_id`) as
the approval call_id. That made
`item/commandExecution/requestApproval.itemId` differ from the actual
`item/started` id. We now forward the sub‑agent’s `call_id` from the
approval event instead, so the approval item id matches the
commandExecution item id in review flows.

Here’s the expected event order for an inline `review/start` that
triggers an exec approval after this fix:
1. Response to review/start (JSON‑RPC response)
- Includes `turn` (status inProgress) and `review_thread_id` (same as
parent thread for inline).
2. `turn/started` notification
  - turnId is the review turn id (e.g., "0").
3. `item/started` → EnteredReviewMode
  - item.id == turnId, marks entry into review mode.
4. `item/started` → commandExecution
  - item.id == <call_id> (e.g., "review-call-1"), status: inProgress.
5. `item/commandExecution/requestApproval` request
  - JSON‑RPC request (not a notification).
  - params.itemId == <call_id> and params.turnId == turnId.
6. Client replies to approval request (Approved / Declined / etc).
7. If approved:
  - Optional `item/commandExecution/outputDelta` notifications.
  - `item/completed` → commandExecution with status and exitCode.
8. Review finishes:
  - `item/started` → ExitedReviewMode
  - `item/completed` → ExitedReviewMode
  - (Agent message items may also appear, depending on review output.)
9. `turn/completed` notification

The key being #4 and #5 are now in the proper order with the correct
item id.
This commit is contained in:
Owen Lin 2026-02-03 12:08:17 -08:00 committed by GitHub
parent efd96c46c7
commit d9ad5c3c49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 119 additions and 13 deletions

View file

@ -1,6 +1,9 @@
use anyhow::Result;
use app_test_support::McpProcess;
use app_test_support::create_final_assistant_message_sse_response;
use app_test_support::create_mock_responses_server_repeating_assistant;
use app_test_support::create_mock_responses_server_sequence;
use app_test_support::create_shell_command_sse_response;
use app_test_support::to_response;
use codex_app_server_protocol::ItemCompletedNotification;
use codex_app_server_protocol::ItemStartedNotification;
@ -12,6 +15,7 @@ use codex_app_server_protocol::ReviewDelivery;
use codex_app_server_protocol::ReviewStartParams;
use codex_app_server_protocol::ReviewStartResponse;
use codex_app_server_protocol::ReviewTarget;
use codex_app_server_protocol::ServerRequest;
use codex_app_server_protocol::ThreadItem;
use codex_app_server_protocol::ThreadStartParams;
use codex_app_server_protocol::ThreadStartResponse;
@ -129,6 +133,91 @@ async fn review_start_runs_review_turn_and_emits_code_review_item() -> Result<()
Ok(())
}
#[tokio::test]
async fn review_start_exec_approval_item_id_matches_command_execution_item() -> Result<()> {
let responses = vec![
create_shell_command_sse_response(
vec![
"git".to_string(),
"rev-parse".to_string(),
"HEAD".to_string(),
],
None,
Some(5000),
"review-call-1",
)?,
create_final_assistant_message_sse_response("done")?,
];
let server = create_mock_responses_server_sequence(responses).await;
let codex_home = TempDir::new()?;
create_config_toml_with_approval_policy(codex_home.path(), &server.uri(), "untrusted")?;
let mut mcp = McpProcess::new(codex_home.path()).await?;
timeout(DEFAULT_READ_TIMEOUT, mcp.initialize()).await??;
let thread_id = start_default_thread(&mut mcp).await?;
let review_req = mcp
.send_review_start_request(ReviewStartParams {
thread_id,
delivery: Some(ReviewDelivery::Inline),
target: ReviewTarget::Commit {
sha: "1234567deadbeef".to_string(),
title: Some("Check review approvals".to_string()),
},
})
.await?;
let review_resp: JSONRPCResponse = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_response_message(RequestId::Integer(review_req)),
)
.await??;
let ReviewStartResponse { turn, .. } = to_response::<ReviewStartResponse>(review_resp)?;
let turn_id = turn.id.clone();
let server_req = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_request_message(),
)
.await??;
let ServerRequest::CommandExecutionRequestApproval { request_id, params } = server_req else {
panic!("expected CommandExecutionRequestApproval request");
};
assert_eq!(params.item_id, "review-call-1");
assert_eq!(params.turn_id, turn_id);
let mut command_item_id = None;
for _ in 0..10 {
let item_started: JSONRPCNotification = timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("item/started"),
)
.await??;
let started: ItemStartedNotification =
serde_json::from_value(item_started.params.expect("params must be present"))?;
if let ThreadItem::CommandExecution { id, .. } = started.item {
command_item_id = Some(id);
break;
}
}
let command_item_id = command_item_id.expect("did not observe command execution item");
assert_eq!(command_item_id, params.item_id);
mcp.send_response(
request_id,
serde_json::json!({ "decision": codex_core::protocol::ReviewDecision::Approved }),
)
.await?;
timeout(
DEFAULT_READ_TIMEOUT,
mcp.read_stream_until_notification_message("turn/completed"),
)
.await??;
Ok(())
}
#[tokio::test]
async fn review_start_rejects_empty_base_branch() -> Result<()> {
let server = create_mock_responses_server_repeating_assistant("Done").await;
@ -299,13 +388,21 @@ async fn start_default_thread(mcp: &mut McpProcess) -> Result<String> {
}
fn create_config_toml(codex_home: &std::path::Path, server_uri: &str) -> std::io::Result<()> {
create_config_toml_with_approval_policy(codex_home, server_uri, "never")
}
fn create_config_toml_with_approval_policy(
codex_home: &std::path::Path,
server_uri: &str,
approval_policy: &str,
) -> std::io::Result<()> {
let config_toml = codex_home.join("config.toml");
std::fs::write(
config_toml,
format!(
r#"
model = "mock-model"
approval_policy = "never"
approval_policy = "{approval_policy}"
sandbox_mode = "read-only"
model_provider = "mock_provider"

View file

@ -312,14 +312,22 @@ async fn handle_exec_approval(
event: ExecApprovalRequestEvent,
cancel_token: &CancellationToken,
) {
let ExecApprovalRequestEvent {
call_id,
command,
cwd,
reason,
proposed_execpolicy_amendment,
..
} = event;
// Race approval with cancellation and timeout to avoid hangs.
let approval_fut = parent_session.request_command_approval(
parent_ctx,
parent_ctx.sub_id.clone(),
event.command,
event.cwd,
event.reason,
event.proposed_execpolicy_amendment,
call_id,
command,
cwd,
reason,
proposed_execpolicy_amendment,
);
let decision = await_approval_with_cancel(
approval_fut,
@ -341,14 +349,15 @@ async fn handle_patch_approval(
event: ApplyPatchApprovalRequestEvent,
cancel_token: &CancellationToken,
) {
let ApplyPatchApprovalRequestEvent {
call_id,
changes,
reason,
grant_root,
..
} = event;
let decision_rx = parent_session
.request_patch_approval(
parent_ctx,
parent_ctx.sub_id.clone(),
event.changes,
event.reason,
event.grant_root,
)
.request_patch_approval(parent_ctx, call_id, changes, reason, grant_root)
.await;
let decision = await_approval_with_cancel(
async move { decision_rx.await.unwrap_or_default() },