Propagate tool errors to code mode (#15075)
Clean up error flow to push the FunctionCallError all the way up to dispatcher and allow code mode to surface as exception.
This commit is contained in:
parent
392347d436
commit
88e5382fc4
10 changed files with 153 additions and 138 deletions
|
|
@ -4557,7 +4557,7 @@ async fn fatal_tool_error_stops_turn_and_reports_error() {
|
|||
.expect("tool call present");
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let err = router
|
||||
.dispatch_tool_call(
|
||||
.dispatch_tool_call_with_code_mode_result(
|
||||
Arc::clone(&session),
|
||||
Arc::clone(&turn_context),
|
||||
tracker,
|
||||
|
|
@ -4565,7 +4565,8 @@ async fn fatal_tool_error_stops_turn_and_reports_error() {
|
|||
ToolCallSource::Direct,
|
||||
)
|
||||
.await
|
||||
.expect_err("expected fatal error");
|
||||
.err()
|
||||
.expect("expected fatal error");
|
||||
|
||||
match err {
|
||||
FunctionCallError::Fatal(message) => {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ use serde_json::Value as JsonValue;
|
|||
use crate::client_common::tools::ToolSpec;
|
||||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::ToolRouter;
|
||||
use crate::tools::code_mode_description::augment_tool_spec_for_code_mode;
|
||||
use crate::tools::code_mode_description::code_mode_tool_reference;
|
||||
|
|
@ -303,9 +304,11 @@ async fn call_nested_tool(
|
|||
tool_name: String,
|
||||
input: Option<JsonValue>,
|
||||
cancellation_token: tokio_util::sync::CancellationToken,
|
||||
) -> JsonValue {
|
||||
) -> Result<JsonValue, FunctionCallError> {
|
||||
if tool_name == PUBLIC_TOOL_NAME {
|
||||
return JsonValue::String(format!("{PUBLIC_TOOL_NAME} cannot invoke itself"));
|
||||
return Err(FunctionCallError::RespondToModel(format!(
|
||||
"{PUBLIC_TOOL_NAME} cannot invoke itself"
|
||||
)));
|
||||
}
|
||||
|
||||
let payload =
|
||||
|
|
@ -316,12 +319,12 @@ async fn call_nested_tool(
|
|||
tool,
|
||||
raw_arguments,
|
||||
},
|
||||
Err(error) => return JsonValue::String(error),
|
||||
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
|
||||
}
|
||||
} else {
|
||||
match build_nested_tool_payload(tool_runtime.find_spec(&tool_name), &tool_name, input) {
|
||||
Ok(payload) => payload,
|
||||
Err(error) => return JsonValue::String(error),
|
||||
Err(error) => return Err(FunctionCallError::RespondToModel(error)),
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -333,12 +336,8 @@ async fn call_nested_tool(
|
|||
};
|
||||
let result = tool_runtime
|
||||
.handle_tool_call_with_source(call, ToolCallSource::CodeMode, cancellation_token)
|
||||
.await;
|
||||
|
||||
match result {
|
||||
Ok(result) => result.code_mode_result(),
|
||||
Err(error) => JsonValue::String(error.to_string()),
|
||||
}
|
||||
.await?;
|
||||
Ok(result.code_mode_result())
|
||||
}
|
||||
|
||||
fn tool_kind_for_spec(spec: &ToolSpec) -> protocol::CodeModeToolKind {
|
||||
|
|
|
|||
|
|
@ -70,6 +70,8 @@ pub(super) enum HostToNodeMessage {
|
|||
request_id: String,
|
||||
id: String,
|
||||
code_mode_result: JsonValue,
|
||||
#[serde(default)]
|
||||
error_text: Option<String>,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -595,6 +595,10 @@ function createProtocol() {
|
|||
return;
|
||||
}
|
||||
pending.delete(message.request_id + ':' + message.id);
|
||||
if (typeof message.error_text === 'string') {
|
||||
entry.reject(new Error(message.error_text));
|
||||
return;
|
||||
}
|
||||
entry.resolve(message.code_mode_result ?? '');
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ use super::process::write_message;
|
|||
use super::protocol::HostToNodeMessage;
|
||||
use super::protocol::NodeToHostMessage;
|
||||
use crate::tools::parallel::ToolCallRuntime;
|
||||
|
||||
pub(crate) struct CodeModeWorker {
|
||||
shutdown_tx: Option<oneshot::Sender<()>>,
|
||||
}
|
||||
|
|
@ -53,17 +54,23 @@ impl CodeModeProcess {
|
|||
let tool_runtime = tool_runtime.clone();
|
||||
let stdin = stdin.clone();
|
||||
tokio::spawn(async move {
|
||||
let result = call_nested_tool(
|
||||
exec,
|
||||
tool_runtime,
|
||||
tool_call.name,
|
||||
tool_call.input,
|
||||
CancellationToken::new(),
|
||||
)
|
||||
.await;
|
||||
let (code_mode_result, error_text) = match result {
|
||||
Ok(code_mode_result) => (code_mode_result, None),
|
||||
Err(error) => (serde_json::Value::Null, Some(error.to_string())),
|
||||
};
|
||||
let response = HostToNodeMessage::Response {
|
||||
request_id: tool_call.request_id,
|
||||
id: tool_call.id,
|
||||
code_mode_result: call_nested_tool(
|
||||
exec,
|
||||
tool_runtime,
|
||||
tool_call.name,
|
||||
tool_call.input,
|
||||
CancellationToken::new(),
|
||||
)
|
||||
.await,
|
||||
code_mode_result,
|
||||
error_text,
|
||||
};
|
||||
if let Err(err) = write_message(&stdin, &response).await {
|
||||
warn!("failed to write {PUBLIC_TOOL_NAME} tool response: {err}");
|
||||
|
|
|
|||
|
|
@ -1607,8 +1607,8 @@ impl JsReplManager {
|
|||
let tracker = Arc::clone(&exec.tracker);
|
||||
|
||||
match router
|
||||
.dispatch_tool_call(
|
||||
session.clone(),
|
||||
.dispatch_tool_call_with_code_mode_result(
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call,
|
||||
|
|
@ -1616,7 +1616,8 @@ impl JsReplManager {
|
|||
)
|
||||
.await
|
||||
{
|
||||
Ok(response) => {
|
||||
Ok(result) => {
|
||||
let response = result.into_response();
|
||||
let summary = Self::summarize_tool_call_response(&response);
|
||||
match serde_json::to_value(response) {
|
||||
Ok(value) => {
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ use crate::error::CodexErr;
|
|||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::AbortedToolOutput;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::registry::AnyToolResult;
|
||||
use crate::tools::router::ToolCall;
|
||||
use crate::tools::router::ToolCallSource;
|
||||
|
|
@ -57,9 +58,17 @@ impl ToolCallRuntime {
|
|||
call: ToolCall,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> impl std::future::Future<Output = Result<ResponseInputItem, CodexErr>> {
|
||||
let error_call = call.clone();
|
||||
let future =
|
||||
self.handle_tool_call_with_source(call, ToolCallSource::Direct, cancellation_token);
|
||||
async move { future.await.map(AnyToolResult::into_response) }.in_current_span()
|
||||
async move {
|
||||
match future.await {
|
||||
Ok(response) => Ok(response.into_response()),
|
||||
Err(FunctionCallError::Fatal(message)) => Err(CodexErr::Fatal(message)),
|
||||
Err(other) => Ok(Self::failure_response(error_call, other)),
|
||||
}
|
||||
}
|
||||
.in_current_span()
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip_all)]
|
||||
|
|
@ -68,7 +77,7 @@ impl ToolCallRuntime {
|
|||
call: ToolCall,
|
||||
source: ToolCallSource,
|
||||
cancellation_token: CancellationToken,
|
||||
) -> impl std::future::Future<Output = Result<AnyToolResult, CodexErr>> {
|
||||
) -> impl std::future::Future<Output = Result<AnyToolResult, FunctionCallError>> {
|
||||
let supports_parallel = self.router.tool_supports_parallel(&call.tool_name);
|
||||
let router = Arc::clone(&self.router);
|
||||
let session = Arc::clone(&self.session);
|
||||
|
|
@ -78,7 +87,7 @@ impl ToolCallRuntime {
|
|||
let started = Instant::now();
|
||||
|
||||
let dispatch_span = trace_span!(
|
||||
"dispatch_tool_call",
|
||||
"dispatch_tool_call_with_code_mode_result",
|
||||
otel.name = call.tool_name.as_str(),
|
||||
tool_name = call.tool_name.as_str(),
|
||||
call_id = call.call_id.as_str(),
|
||||
|
|
@ -115,20 +124,42 @@ impl ToolCallRuntime {
|
|||
}));
|
||||
|
||||
async move {
|
||||
match handle.await {
|
||||
Ok(Ok(response)) => Ok(response),
|
||||
Ok(Err(FunctionCallError::Fatal(message))) => Err(CodexErr::Fatal(message)),
|
||||
Ok(Err(other)) => Err(CodexErr::Fatal(other.to_string())),
|
||||
Err(err) => Err(CodexErr::Fatal(format!(
|
||||
"tool task failed to receive: {err:?}"
|
||||
))),
|
||||
}
|
||||
handle.await.map_err(|err| {
|
||||
FunctionCallError::Fatal(format!("tool task failed to receive: {err:?}"))
|
||||
})?
|
||||
}
|
||||
.in_current_span()
|
||||
}
|
||||
}
|
||||
|
||||
impl ToolCallRuntime {
|
||||
fn failure_response(call: ToolCall, err: FunctionCallError) -> ResponseInputItem {
|
||||
let message = err.to_string();
|
||||
match call.payload {
|
||||
ToolPayload::ToolSearch { .. } => ResponseInputItem::ToolSearchOutput {
|
||||
call_id: call.call_id,
|
||||
status: "completed".to_string(),
|
||||
execution: "client".to_string(),
|
||||
tools: Vec::new(),
|
||||
},
|
||||
ToolPayload::Custom { .. } => ResponseInputItem::CustomToolCallOutput {
|
||||
call_id: call.call_id,
|
||||
name: None,
|
||||
output: codex_protocol::models::FunctionCallOutputPayload {
|
||||
body: codex_protocol::models::FunctionCallOutputBody::Text(message),
|
||||
success: Some(false),
|
||||
},
|
||||
},
|
||||
_ => ResponseInputItem::FunctionCallOutput {
|
||||
call_id: call.call_id,
|
||||
output: codex_protocol::models::FunctionCallOutputPayload {
|
||||
body: codex_protocol::models::FunctionCallOutputBody::Text(message),
|
||||
success: Some(false),
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn aborted_response(call: &ToolCall, secs: f32) -> AnyToolResult {
|
||||
AnyToolResult {
|
||||
call_id: call.call_id.clone(),
|
||||
|
|
|
|||
|
|
@ -5,11 +5,9 @@ use crate::function_tool::FunctionCallError;
|
|||
use crate::mcp_connection_manager::ToolInfo;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::tools::code_mode::is_code_mode_nested_tool;
|
||||
use crate::tools::context::FunctionToolOutput;
|
||||
use crate::tools::context::SharedTurnDiffTracker;
|
||||
use crate::tools::context::ToolInvocation;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::tools::context::ToolSearchOutput;
|
||||
use crate::tools::discoverable::DiscoverableTool;
|
||||
use crate::tools::registry::AnyToolResult;
|
||||
use crate::tools::registry::ConfiguredToolSpec;
|
||||
|
|
@ -18,7 +16,6 @@ use crate::tools::spec::ToolsConfig;
|
|||
use crate::tools::spec::build_specs_with_discoverable_tools;
|
||||
use codex_protocol::dynamic_tools::DynamicToolSpec;
|
||||
use codex_protocol::models::LocalShellAction;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::models::SearchToolCallParams;
|
||||
use codex_protocol::models::ShellToolCallParams;
|
||||
|
|
@ -214,21 +211,6 @@ impl ToolRouter {
|
|||
}
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip_all, err)]
|
||||
pub async fn dispatch_tool_call(
|
||||
&self,
|
||||
session: Arc<Session>,
|
||||
turn: Arc<TurnContext>,
|
||||
tracker: SharedTurnDiffTracker,
|
||||
call: ToolCall,
|
||||
source: ToolCallSource,
|
||||
) -> Result<ResponseInputItem, FunctionCallError> {
|
||||
Ok(self
|
||||
.dispatch_tool_call_with_code_mode_result(session, turn, tracker, call, source)
|
||||
.await?
|
||||
.into_response())
|
||||
}
|
||||
|
||||
#[instrument(level = "trace", skip_all, err)]
|
||||
pub async fn dispatch_tool_call_with_code_mode_result(
|
||||
&self,
|
||||
|
|
@ -244,23 +226,14 @@ impl ToolRouter {
|
|||
call_id,
|
||||
payload,
|
||||
} = call;
|
||||
let payload_outputs_custom = matches!(payload, ToolPayload::Custom { .. });
|
||||
let payload_outputs_tool_search = matches!(payload, ToolPayload::ToolSearch { .. });
|
||||
let failure_call_id = call_id.clone();
|
||||
|
||||
if source == ToolCallSource::Direct
|
||||
&& turn.tools_config.js_repl_tools_only
|
||||
&& !matches!(tool_name.as_str(), "js_repl" | "js_repl_reset")
|
||||
{
|
||||
let err = FunctionCallError::RespondToModel(
|
||||
return Err(FunctionCallError::RespondToModel(
|
||||
"direct tool calls are disabled; use js_repl and codex.tool(...) instead"
|
||||
.to_string(),
|
||||
);
|
||||
return Ok(Self::failure_result(
|
||||
failure_call_id,
|
||||
payload_outputs_custom,
|
||||
payload_outputs_tool_search,
|
||||
err,
|
||||
));
|
||||
}
|
||||
|
||||
|
|
@ -274,53 +247,7 @@ impl ToolRouter {
|
|||
payload,
|
||||
};
|
||||
|
||||
match self.registry.dispatch_any(invocation).await {
|
||||
Ok(response) => Ok(response),
|
||||
Err(FunctionCallError::Fatal(message)) => Err(FunctionCallError::Fatal(message)),
|
||||
Err(err) => Ok(Self::failure_result(
|
||||
failure_call_id,
|
||||
payload_outputs_custom,
|
||||
payload_outputs_tool_search,
|
||||
err,
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
fn failure_result(
|
||||
call_id: String,
|
||||
payload_outputs_custom: bool,
|
||||
payload_outputs_tool_search: bool,
|
||||
err: FunctionCallError,
|
||||
) -> AnyToolResult {
|
||||
let message = err.to_string();
|
||||
if payload_outputs_tool_search {
|
||||
AnyToolResult {
|
||||
call_id,
|
||||
payload: ToolPayload::ToolSearch {
|
||||
arguments: SearchToolCallParams {
|
||||
query: String::new(),
|
||||
limit: None,
|
||||
},
|
||||
},
|
||||
result: Box::new(ToolSearchOutput { tools: Vec::new() }),
|
||||
}
|
||||
} else if payload_outputs_custom {
|
||||
AnyToolResult {
|
||||
call_id,
|
||||
payload: ToolPayload::Custom {
|
||||
input: String::new(),
|
||||
},
|
||||
result: Box::new(FunctionToolOutput::from_text(message, Some(false))),
|
||||
}
|
||||
} else {
|
||||
AnyToolResult {
|
||||
call_id,
|
||||
payload: ToolPayload::Function {
|
||||
arguments: "{}".to_string(),
|
||||
},
|
||||
result: Box::new(FunctionToolOutput::from_text(message, Some(false))),
|
||||
}
|
||||
}
|
||||
self.registry.dispatch_any(invocation).await
|
||||
}
|
||||
}
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use crate::codex::make_session_and_context;
|
||||
use crate::function_tool::FunctionCallError;
|
||||
use crate::tools::context::ToolPayload;
|
||||
use crate::turn_diff_tracker::TurnDiffTracker;
|
||||
use codex_protocol::models::ResponseInputItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
|
||||
use super::ToolCall;
|
||||
|
|
@ -50,20 +50,21 @@ async fn js_repl_tools_only_blocks_direct_tool_calls() -> anyhow::Result<()> {
|
|||
},
|
||||
};
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let response = router
|
||||
.dispatch_tool_call(session, turn, tracker, call, ToolCallSource::Direct)
|
||||
.await?;
|
||||
|
||||
match response {
|
||||
ResponseInputItem::FunctionCallOutput { output, .. } => {
|
||||
let content = output.text_content().unwrap_or_default();
|
||||
assert!(
|
||||
content.contains("direct tool calls are disabled"),
|
||||
"unexpected tool call message: {content}",
|
||||
);
|
||||
}
|
||||
other => panic!("expected function call output, got {other:?}"),
|
||||
}
|
||||
let err = router
|
||||
.dispatch_tool_call_with_code_mode_result(
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call,
|
||||
ToolCallSource::Direct,
|
||||
)
|
||||
.await
|
||||
.err()
|
||||
.expect("direct tool calls should be blocked");
|
||||
let FunctionCallError::RespondToModel(message) = err else {
|
||||
panic!("expected RespondToModel, got {err:?}");
|
||||
};
|
||||
assert!(message.contains("direct tool calls are disabled"));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -107,20 +108,22 @@ async fn js_repl_tools_only_allows_js_repl_source_calls() -> anyhow::Result<()>
|
|||
},
|
||||
};
|
||||
let tracker = Arc::new(tokio::sync::Mutex::new(TurnDiffTracker::new()));
|
||||
let response = router
|
||||
.dispatch_tool_call(session, turn, tracker, call, ToolCallSource::JsRepl)
|
||||
.await?;
|
||||
|
||||
match response {
|
||||
ResponseInputItem::FunctionCallOutput { output, .. } => {
|
||||
let content = output.text_content().unwrap_or_default();
|
||||
assert!(
|
||||
!content.contains("direct tool calls are disabled"),
|
||||
"js_repl source should bypass direct-call policy gate"
|
||||
);
|
||||
}
|
||||
other => panic!("expected function call output, got {other:?}"),
|
||||
}
|
||||
let err = router
|
||||
.dispatch_tool_call_with_code_mode_result(
|
||||
session,
|
||||
turn,
|
||||
tracker,
|
||||
call,
|
||||
ToolCallSource::JsRepl,
|
||||
)
|
||||
.await
|
||||
.err()
|
||||
.expect("shell call with empty args should fail");
|
||||
let message = err.to_string();
|
||||
assert!(
|
||||
!message.contains("direct tool calls are disabled"),
|
||||
"js_repl source should bypass direct-call policy gate"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
|
|
@ -537,6 +537,46 @@ Error:\ boom\n
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_exec_surfaces_handler_errors_as_exceptions() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let server = responses::start_mock_server().await;
|
||||
let (_test, second_mock) = run_code_mode_turn(
|
||||
&server,
|
||||
"surface nested tool handler failures as script exceptions",
|
||||
r#"
|
||||
try {
|
||||
await tools.exec_command({});
|
||||
text("no-exception");
|
||||
} catch (error) {
|
||||
text(`caught:${error?.message ?? String(error)}`);
|
||||
}
|
||||
"#,
|
||||
false,
|
||||
)
|
||||
.await?;
|
||||
|
||||
let request = second_mock.single_request();
|
||||
let (output, success) = custom_tool_output_body_and_success(&request, "call-1");
|
||||
assert_ne!(
|
||||
success,
|
||||
Some(false),
|
||||
"script should catch the nested tool error: {output}"
|
||||
);
|
||||
assert!(
|
||||
output.contains("caught:"),
|
||||
"expected caught exception text in output: {output}"
|
||||
);
|
||||
assert!(
|
||||
!output.contains("no-exception"),
|
||||
"nested tool error should not allow success path: {output}"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg_attr(windows, ignore = "no exec_command on Windows")]
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn code_mode_can_yield_and_resume_with_wait() -> Result<()> {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue