Removed experimental "command risk assessment" feature (#7799)
This experimental feature received lukewarm reception during internal testing. Removing from the code base.
This commit is contained in:
parent
e0fb3ca1db
commit
c4af707e09
40 changed files with 13 additions and 703 deletions
52
codex-rs/Cargo.lock
generated
52
codex-rs/Cargo.lock
generated
|
|
@ -238,48 +238,6 @@ dependencies = [
|
|||
"term",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "askama"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f75363874b771be265f4ffe307ca705ef6f3baa19011c149da8674a87f1b75c4"
|
||||
dependencies = [
|
||||
"askama_derive",
|
||||
"itoa",
|
||||
"percent-encoding",
|
||||
"serde",
|
||||
"serde_json",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "askama_derive"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "129397200fe83088e8a68407a8e2b1f826cf0086b21ccdb866a722c8bcd3a94f"
|
||||
dependencies = [
|
||||
"askama_parser",
|
||||
"basic-toml",
|
||||
"memchr",
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"rustc-hash",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"syn 2.0.104",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "askama_parser"
|
||||
version = "0.14.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d6ab5630b3d5eaf232620167977f95eb51f3432fc76852328774afbd242d4358"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"winnow",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "assert-json-diff"
|
||||
version = "2.0.2"
|
||||
|
|
@ -557,15 +515,6 @@ version = "0.22.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
|
||||
|
||||
[[package]]
|
||||
name = "basic-toml"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ba62675e8242a4c4e806d12f11d136e626e6c8361d6b829310732241652a178a"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "beef"
|
||||
version = "0.5.2"
|
||||
|
|
@ -1137,7 +1086,6 @@ name = "codex-core"
|
|||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"askama",
|
||||
"assert_cmd",
|
||||
"assert_matches",
|
||||
"async-channel",
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ allocative = "0.3.3"
|
|||
ansi-to-tui = "7.0.0"
|
||||
anyhow = "1"
|
||||
arboard = { version = "3", features = ["wayland-data-control"] }
|
||||
askama = "0.14"
|
||||
assert_cmd = "2"
|
||||
assert_matches = "1.5.0"
|
||||
async-channel = "2.3.1"
|
||||
|
|
|
|||
|
|
@ -654,7 +654,6 @@ mod tests {
|
|||
command: vec!["echo".to_string(), "hello".to_string()],
|
||||
cwd: PathBuf::from("/tmp"),
|
||||
reason: Some("because tests".to_string()),
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "echo hello".to_string(),
|
||||
}],
|
||||
|
|
@ -674,7 +673,6 @@ mod tests {
|
|||
"command": ["echo", "hello"],
|
||||
"cwd": "/tmp",
|
||||
"reason": "because tests",
|
||||
"risk": null,
|
||||
"parsedCmd": [
|
||||
{
|
||||
"type": "unknown",
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@ use codex_protocol::protocol::AskForApproval;
|
|||
use codex_protocol::protocol::EventMsg;
|
||||
use codex_protocol::protocol::FileChange;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use codex_protocol::protocol::TurnAbortReason;
|
||||
|
|
@ -226,7 +225,6 @@ pub struct ExecCommandApprovalParams {
|
|||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
pub reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
pub parsed_cmd: Vec<ParsedCommand>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ use std::path::PathBuf;
|
|||
use crate::protocol::common::AuthMode;
|
||||
use codex_protocol::account::PlanType;
|
||||
use codex_protocol::approvals::ExecPolicyAmendment as CoreExecPolicyAmendment;
|
||||
use codex_protocol::approvals::SandboxCommandAssessment as CoreSandboxCommandAssessment;
|
||||
use codex_protocol::config_types::ReasoningSummary;
|
||||
use codex_protocol::items::AgentMessageContent as CoreAgentMessageContent;
|
||||
use codex_protocol::items::TurnItem as CoreTurnItem;
|
||||
|
|
@ -275,14 +274,6 @@ pub struct ConfigEdit {
|
|||
pub merge_strategy: MergeStrategy,
|
||||
}
|
||||
|
||||
v2_enum_from_core!(
|
||||
pub enum CommandRiskLevel from codex_protocol::approvals::SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High
|
||||
}
|
||||
);
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
|
|
@ -362,32 +353,6 @@ impl From<codex_protocol::protocol::SandboxPolicy> for SandboxPolicy {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, JsonSchema, TS)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
#[ts(export_to = "v2/")]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: CommandRiskLevel,
|
||||
}
|
||||
|
||||
impl SandboxCommandAssessment {
|
||||
pub fn into_core(self) -> CoreSandboxCommandAssessment {
|
||||
CoreSandboxCommandAssessment {
|
||||
description: self.description,
|
||||
risk_level: self.risk_level.to_core(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CoreSandboxCommandAssessment> for SandboxCommandAssessment {
|
||||
fn from(value: CoreSandboxCommandAssessment) -> Self {
|
||||
Self {
|
||||
description: value.description,
|
||||
risk_level: CommandRiskLevel::from(value.risk_level),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq, JsonSchema, TS)]
|
||||
#[serde(transparent)]
|
||||
#[ts(type = "Array<string>", export_to = "v2/")]
|
||||
|
|
@ -1535,8 +1500,6 @@ pub struct CommandExecutionRequestApprovalParams {
|
|||
pub item_id: String,
|
||||
/// Optional explanatory reason (e.g. request for network access).
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
/// Optional proposed execpolicy amendment to allow similar commands without prompting.
|
||||
pub proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -756,7 +756,6 @@ impl CodexClient {
|
|||
turn_id,
|
||||
item_id,
|
||||
reason,
|
||||
risk,
|
||||
proposed_execpolicy_amendment,
|
||||
} = params;
|
||||
|
||||
|
|
@ -766,9 +765,6 @@ impl CodexClient {
|
|||
if let Some(reason) = reason.as_deref() {
|
||||
println!("< reason: {reason}");
|
||||
}
|
||||
if let Some(risk) = risk.as_ref() {
|
||||
println!("< risk assessment: {risk:?}");
|
||||
}
|
||||
if let Some(execpolicy_amendment) = proposed_execpolicy_amendment.as_ref() {
|
||||
println!("< proposed execpolicy amendment: {execpolicy_amendment:?}");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,7 +34,6 @@ use codex_app_server_protocol::PatchChangeKind as V2PatchChangeKind;
|
|||
use codex_app_server_protocol::ReasoningSummaryPartAddedNotification;
|
||||
use codex_app_server_protocol::ReasoningSummaryTextDeltaNotification;
|
||||
use codex_app_server_protocol::ReasoningTextDeltaNotification;
|
||||
use codex_app_server_protocol::SandboxCommandAssessment as V2SandboxCommandAssessment;
|
||||
use codex_app_server_protocol::ServerNotification;
|
||||
use codex_app_server_protocol::ServerRequestPayload;
|
||||
use codex_app_server_protocol::TerminalInteractionNotification;
|
||||
|
|
@ -180,7 +179,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
|||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
proposed_execpolicy_amendment,
|
||||
parsed_cmd,
|
||||
}) => match api_version {
|
||||
|
|
@ -191,7 +189,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
|||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
parsed_cmd,
|
||||
};
|
||||
let rx = outgoing
|
||||
|
|
@ -219,7 +216,6 @@ pub(crate) async fn apply_bespoke_event_handling(
|
|||
// and emit the corresponding EventMsg, we repurpose the call_id as the item_id.
|
||||
item_id: item_id.clone(),
|
||||
reason,
|
||||
risk: risk.map(V2SandboxCommandAssessment::from),
|
||||
proposed_execpolicy_amendment: proposed_execpolicy_amendment_v2,
|
||||
};
|
||||
let rx = outgoing
|
||||
|
|
@ -1214,7 +1210,7 @@ async fn construct_mcp_tool_call_notification(
|
|||
}
|
||||
}
|
||||
|
||||
/// simiilar to handle_mcp_tool_call_end in exec
|
||||
/// similar to handle_mcp_tool_call_end in exec
|
||||
async fn construct_mcp_tool_call_end_notification(
|
||||
end_event: McpToolCallEndEvent,
|
||||
thread_id: String,
|
||||
|
|
|
|||
|
|
@ -271,7 +271,6 @@ async fn test_send_user_turn_changes_approval_policy_behavior() -> Result<()> {
|
|||
command: format_with_current_shell("python3 -c 'print(42)'"),
|
||||
cwd: working_directory.clone(),
|
||||
reason: None,
|
||||
risk: None,
|
||||
parsed_cmd: vec![ParsedCommand::Unknown {
|
||||
cmd: "python3 -c 'print(42)'".to_string()
|
||||
}],
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ workspace = true
|
|||
|
||||
[dependencies]
|
||||
anyhow = { workspace = true }
|
||||
askama = { workspace = true }
|
||||
async-channel = { workspace = true }
|
||||
async-trait = { workspace = true }
|
||||
base64 = { workspace = true }
|
||||
|
|
|
|||
|
|
@ -95,7 +95,6 @@ use crate::protocol::RateLimitSnapshot;
|
|||
use crate::protocol::ReasoningContentDeltaEvent;
|
||||
use crate::protocol::ReasoningRawContentDeltaEvent;
|
||||
use crate::protocol::ReviewDecision;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::protocol::SessionConfiguredEvent;
|
||||
use crate::protocol::StreamErrorEvent;
|
||||
|
|
@ -875,34 +874,6 @@ impl Session {
|
|||
.await;
|
||||
}
|
||||
|
||||
pub(crate) async fn assess_sandbox_command(
|
||||
&self,
|
||||
turn_context: &TurnContext,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
let config = turn_context.client.config();
|
||||
let provider = turn_context.client.provider().clone();
|
||||
let auth_manager = Arc::clone(&self.services.auth_manager);
|
||||
let otel = self.services.otel_event_manager.clone();
|
||||
crate::sandboxing::assessment::assess_command(
|
||||
config,
|
||||
provider,
|
||||
auth_manager,
|
||||
&otel,
|
||||
self.conversation_id,
|
||||
self.services.models_manager.clone(),
|
||||
turn_context.client.get_session_source(),
|
||||
call_id,
|
||||
command,
|
||||
&turn_context.sandbox_policy,
|
||||
&turn_context.cwd,
|
||||
failure_message,
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Adds an execpolicy amendment to both the in-memory and on-disk policies so future
|
||||
/// commands can use the newly approved prefix.
|
||||
pub(crate) async fn persist_execpolicy_amendment(
|
||||
|
|
@ -950,7 +921,6 @@ impl Session {
|
|||
command: Vec<String>,
|
||||
cwd: PathBuf,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
|
||||
) -> ReviewDecision {
|
||||
let sub_id = turn_context.sub_id.clone();
|
||||
|
|
@ -978,7 +948,6 @@ impl Session {
|
|||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
proposed_execpolicy_amendment,
|
||||
parsed_cmd,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -280,7 +280,6 @@ async fn handle_exec_approval(
|
|||
event.command,
|
||||
event.cwd,
|
||||
event.reason,
|
||||
event.risk,
|
||||
event.proposed_execpolicy_amendment,
|
||||
);
|
||||
let decision = await_approval_with_cancel(
|
||||
|
|
|
|||
|
|
@ -246,9 +246,6 @@ pub struct Config {
|
|||
|
||||
pub tools_web_search_request: bool,
|
||||
|
||||
/// When `true`, run a model-based assessment for commands denied by the sandbox.
|
||||
pub experimental_sandbox_command_assessment: bool,
|
||||
|
||||
/// If set to `true`, used only the experimental unified exec tool.
|
||||
pub use_experimental_unified_exec_tool: bool,
|
||||
|
||||
|
|
@ -733,7 +730,6 @@ pub struct ConfigToml {
|
|||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Preferred OSS provider for local models, e.g. "lmstudio" or "ollama".
|
||||
pub oss_provider: Option<String>,
|
||||
}
|
||||
|
|
@ -919,7 +915,6 @@ pub struct ConfigOverrides {
|
|||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub show_raw_agent_reasoning: Option<bool>,
|
||||
pub tools_web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
/// Additional directories that should be treated as writable roots for this session.
|
||||
pub additional_writable_roots: Vec<PathBuf>,
|
||||
}
|
||||
|
|
@ -978,7 +973,6 @@ impl Config {
|
|||
include_apply_patch_tool: include_apply_patch_tool_override,
|
||||
show_raw_agent_reasoning,
|
||||
tools_web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
additional_writable_roots,
|
||||
} = overrides;
|
||||
|
||||
|
|
@ -1003,7 +997,6 @@ impl Config {
|
|||
let feature_overrides = FeatureOverrides {
|
||||
include_apply_patch_tool: include_apply_patch_tool_override,
|
||||
web_search_request: override_tools_web_search_request,
|
||||
experimental_sandbox_command_assessment: sandbox_command_assessment_override,
|
||||
};
|
||||
|
||||
let features = Features::from_config(&cfg, &config_profile, feature_overrides);
|
||||
|
|
@ -1102,8 +1095,6 @@ impl Config {
|
|||
let tools_web_search_request = features.enabled(Feature::WebSearchRequest);
|
||||
let use_experimental_unified_exec_tool = features.enabled(Feature::UnifiedExec);
|
||||
let use_experimental_use_rmcp_client = features.enabled(Feature::RmcpClient);
|
||||
let experimental_sandbox_command_assessment =
|
||||
features.enabled(Feature::SandboxCommandAssessment);
|
||||
|
||||
let forced_chatgpt_workspace_id =
|
||||
cfg.forced_chatgpt_workspace_id.as_ref().and_then(|value| {
|
||||
|
|
@ -1234,7 +1225,6 @@ impl Config {
|
|||
forced_login_method,
|
||||
include_apply_patch_tool: include_apply_patch_tool_flag,
|
||||
tools_web_search_request,
|
||||
experimental_sandbox_command_assessment,
|
||||
use_experimental_unified_exec_tool,
|
||||
use_experimental_use_rmcp_client,
|
||||
features,
|
||||
|
|
@ -2990,7 +2980,6 @@ model_verbosity = "high"
|
|||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
features: Features::with_defaults(),
|
||||
|
|
@ -3065,7 +3054,6 @@ model_verbosity = "high"
|
|||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
features: Features::with_defaults(),
|
||||
|
|
@ -3155,7 +3143,6 @@ model_verbosity = "high"
|
|||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
features: Features::with_defaults(),
|
||||
|
|
@ -3231,7 +3218,6 @@ model_verbosity = "high"
|
|||
forced_login_method: None,
|
||||
include_apply_patch_tool: false,
|
||||
tools_web_search_request: false,
|
||||
experimental_sandbox_command_assessment: false,
|
||||
use_experimental_unified_exec_tool: false,
|
||||
use_experimental_use_rmcp_client: false,
|
||||
features: Features::with_defaults(),
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ pub struct ConfigProfile {
|
|||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub tools_web_search: Option<bool>,
|
||||
pub tools_view_image: Option<bool>,
|
||||
/// Optional feature toggles scoped to this profile.
|
||||
|
|
|
|||
|
|
@ -48,8 +48,6 @@ pub enum Feature {
|
|||
WebSearchRequest,
|
||||
/// Gate the execpolicy enforcement for shell/unified exec.
|
||||
ExecPolicy,
|
||||
/// Enable the model-based risk assessments for sandboxed commands.
|
||||
SandboxCommandAssessment,
|
||||
/// Enable Windows sandbox (restricted token) on Windows.
|
||||
WindowsSandbox,
|
||||
/// Remote compaction enabled (only for ChatGPT auth)
|
||||
|
|
@ -104,7 +102,6 @@ pub struct Features {
|
|||
pub struct FeatureOverrides {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub web_search_request: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
}
|
||||
|
||||
impl FeatureOverrides {
|
||||
|
|
@ -196,7 +193,6 @@ impl Features {
|
|||
let mut features = Features::with_defaults();
|
||||
|
||||
let base_legacy = LegacyFeatureToggles {
|
||||
experimental_sandbox_command_assessment: cfg.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: cfg.experimental_use_freeform_apply_patch,
|
||||
experimental_use_unified_exec_tool: cfg.experimental_use_unified_exec_tool,
|
||||
experimental_use_rmcp_client: cfg.experimental_use_rmcp_client,
|
||||
|
|
@ -212,8 +208,6 @@ impl Features {
|
|||
|
||||
let profile_legacy = LegacyFeatureToggles {
|
||||
include_apply_patch_tool: config_profile.include_apply_patch_tool,
|
||||
experimental_sandbox_command_assessment: config_profile
|
||||
.experimental_sandbox_command_assessment,
|
||||
experimental_use_freeform_apply_patch: config_profile
|
||||
.experimental_use_freeform_apply_patch,
|
||||
|
||||
|
|
@ -327,12 +321,6 @@ pub const FEATURES: &[FeatureSpec] = &[
|
|||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::SandboxCommandAssessment,
|
||||
key: "experimental_sandbox_command_assessment",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::WindowsSandbox,
|
||||
key: "enable_experimental_windows_sandbox",
|
||||
|
|
|
|||
|
|
@ -9,10 +9,6 @@ struct Alias {
|
|||
}
|
||||
|
||||
const ALIASES: &[Alias] = &[
|
||||
Alias {
|
||||
legacy_key: "experimental_sandbox_command_assessment",
|
||||
feature: Feature::SandboxCommandAssessment,
|
||||
},
|
||||
Alias {
|
||||
legacy_key: "experimental_use_unified_exec_tool",
|
||||
feature: Feature::UnifiedExec,
|
||||
|
|
@ -48,7 +44,6 @@ pub(crate) fn feature_for_key(key: &str) -> Option<Feature> {
|
|||
#[derive(Debug, Default)]
|
||||
pub struct LegacyFeatureToggles {
|
||||
pub include_apply_patch_tool: Option<bool>,
|
||||
pub experimental_sandbox_command_assessment: Option<bool>,
|
||||
pub experimental_use_freeform_apply_patch: Option<bool>,
|
||||
pub experimental_use_unified_exec_tool: Option<bool>,
|
||||
pub experimental_use_rmcp_client: Option<bool>,
|
||||
|
|
@ -64,12 +59,6 @@ impl LegacyFeatureToggles {
|
|||
self.include_apply_patch_tool,
|
||||
"include_apply_patch_tool",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::SandboxCommandAssessment,
|
||||
self.experimental_sandbox_command_assessment,
|
||||
"experimental_sandbox_command_assessment",
|
||||
);
|
||||
set_if_some(
|
||||
features,
|
||||
Feature::ApplyPatchFreeform,
|
||||
|
|
|
|||
|
|
@ -1,268 +0,0 @@
|
|||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::time::Duration;
|
||||
use std::time::Instant;
|
||||
|
||||
use crate::AuthManager;
|
||||
use crate::ModelProviderInfo;
|
||||
use crate::client::ModelClient;
|
||||
use crate::client_common::Prompt;
|
||||
use crate::client_common::ResponseEvent;
|
||||
use crate::config::Config;
|
||||
use crate::openai_models::models_manager::ModelsManager;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use askama::Template;
|
||||
use codex_otel::otel_event_manager::OtelEventManager;
|
||||
use codex_protocol::ConversationId;
|
||||
use codex_protocol::models::ContentItem;
|
||||
use codex_protocol::models::ResponseItem;
|
||||
use codex_protocol::openai_models::ReasoningEffort as ReasoningEffortConfig;
|
||||
use codex_protocol::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::protocol::SessionSource;
|
||||
use futures::StreamExt;
|
||||
use serde_json::json;
|
||||
use tokio::time::timeout;
|
||||
use tracing::warn;
|
||||
|
||||
const SANDBOX_ASSESSMENT_TIMEOUT: Duration = Duration::from_secs(15);
|
||||
const SANDBOX_ASSESSMENT_REASONING_EFFORT: ReasoningEffortConfig = ReasoningEffortConfig::Medium;
|
||||
|
||||
#[derive(Template)]
|
||||
#[template(path = "sandboxing/assessment_prompt.md", escape = "none")]
|
||||
struct SandboxAssessmentPromptTemplate<'a> {
|
||||
platform: &'a str,
|
||||
sandbox_policy: &'a str,
|
||||
filesystem_roots: Option<&'a str>,
|
||||
working_directory: &'a str,
|
||||
command_argv: &'a str,
|
||||
command_joined: &'a str,
|
||||
sandbox_failure_message: Option<&'a str>,
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub(crate) async fn assess_command(
|
||||
config: Arc<Config>,
|
||||
provider: ModelProviderInfo,
|
||||
auth_manager: Arc<AuthManager>,
|
||||
parent_otel: &OtelEventManager,
|
||||
conversation_id: ConversationId,
|
||||
models_manager: Arc<ModelsManager>,
|
||||
session_source: SessionSource,
|
||||
call_id: &str,
|
||||
command: &[String],
|
||||
sandbox_policy: &SandboxPolicy,
|
||||
cwd: &Path,
|
||||
failure_message: Option<&str>,
|
||||
) -> Option<SandboxCommandAssessment> {
|
||||
if !config.experimental_sandbox_command_assessment || command.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let command_json = serde_json::to_string(command).unwrap_or_else(|_| "[]".to_string());
|
||||
let command_joined =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
let failure = failure_message
|
||||
.map(str::trim)
|
||||
.filter(|msg| !msg.is_empty())
|
||||
.map(str::to_string);
|
||||
|
||||
let cwd_str = cwd.to_string_lossy().to_string();
|
||||
let sandbox_summary = summarize_sandbox_policy(sandbox_policy);
|
||||
let mut roots = sandbox_roots_for_prompt(sandbox_policy, cwd);
|
||||
roots.sort();
|
||||
roots.dedup();
|
||||
|
||||
let platform = std::env::consts::OS;
|
||||
let roots_formatted = roots.iter().map(|root| root.to_string_lossy().to_string());
|
||||
let filesystem_roots = match roots_formatted.collect::<Vec<_>>() {
|
||||
collected if collected.is_empty() => None,
|
||||
collected => Some(collected.join(", ")),
|
||||
};
|
||||
|
||||
let prompt_template = SandboxAssessmentPromptTemplate {
|
||||
platform,
|
||||
sandbox_policy: sandbox_summary.as_str(),
|
||||
filesystem_roots: filesystem_roots.as_deref(),
|
||||
working_directory: cwd_str.as_str(),
|
||||
command_argv: command_json.as_str(),
|
||||
command_joined: command_joined.as_str(),
|
||||
sandbox_failure_message: failure.as_deref(),
|
||||
};
|
||||
let rendered_prompt = match prompt_template.render() {
|
||||
Ok(rendered) => rendered,
|
||||
Err(err) => {
|
||||
warn!("failed to render sandbox assessment prompt: {err}");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let (system_prompt_section, user_prompt_section) = match rendered_prompt.split_once("\n---\n") {
|
||||
Some(split) => split,
|
||||
None => {
|
||||
warn!("rendered sandbox assessment prompt missing separator");
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let system_prompt = system_prompt_section
|
||||
.strip_prefix("System Prompt:\n")
|
||||
.unwrap_or(system_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
let user_prompt = user_prompt_section
|
||||
.strip_prefix("User Prompt:\n")
|
||||
.unwrap_or(user_prompt_section)
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
let prompt = Prompt {
|
||||
input: vec![ResponseItem::Message {
|
||||
id: None,
|
||||
role: "user".to_string(),
|
||||
content: vec![ContentItem::InputText { text: user_prompt }],
|
||||
}],
|
||||
tools: Vec::new(),
|
||||
parallel_tool_calls: false,
|
||||
base_instructions_override: Some(system_prompt),
|
||||
output_schema: Some(sandbox_assessment_schema()),
|
||||
};
|
||||
|
||||
let model_family = models_manager
|
||||
.construct_model_family(&config.model, &config)
|
||||
.await;
|
||||
|
||||
let child_otel = parent_otel.with_model(config.model.as_str(), model_family.slug.as_str());
|
||||
|
||||
let client = ModelClient::new(
|
||||
Arc::clone(&config),
|
||||
Some(auth_manager),
|
||||
model_family,
|
||||
child_otel,
|
||||
provider,
|
||||
Some(SANDBOX_ASSESSMENT_REASONING_EFFORT),
|
||||
config.model_reasoning_summary,
|
||||
conversation_id,
|
||||
session_source,
|
||||
);
|
||||
|
||||
let start = Instant::now();
|
||||
let assessment_result = timeout(SANDBOX_ASSESSMENT_TIMEOUT, async move {
|
||||
let mut stream = client.stream(&prompt).await?;
|
||||
let mut last_json: Option<String> = None;
|
||||
while let Some(event) = stream.next().await {
|
||||
match event {
|
||||
Ok(ResponseEvent::OutputItemDone(item)) => {
|
||||
if let Some(text) = response_item_text(&item) {
|
||||
last_json = Some(text);
|
||||
}
|
||||
}
|
||||
Ok(ResponseEvent::RateLimits(_)) => {}
|
||||
Ok(ResponseEvent::Completed { .. }) => break,
|
||||
Ok(_) => continue,
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
}
|
||||
Ok(last_json)
|
||||
})
|
||||
.await;
|
||||
let duration = start.elapsed();
|
||||
parent_otel.sandbox_assessment_latency(call_id, duration);
|
||||
|
||||
match assessment_result {
|
||||
Ok(Ok(Some(raw))) => match serde_json::from_str::<SandboxCommandAssessment>(raw.trim()) {
|
||||
Ok(assessment) => {
|
||||
parent_otel.sandbox_assessment(
|
||||
call_id,
|
||||
"success",
|
||||
Some(assessment.risk_level),
|
||||
duration,
|
||||
);
|
||||
return Some(assessment);
|
||||
}
|
||||
Err(err) => {
|
||||
warn!("failed to parse sandbox assessment JSON: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "parse_error", None, duration);
|
||||
}
|
||||
},
|
||||
Ok(Ok(None)) => {
|
||||
warn!("sandbox assessment response did not include any message");
|
||||
parent_otel.sandbox_assessment(call_id, "no_output", None, duration);
|
||||
}
|
||||
Ok(Err(err)) => {
|
||||
warn!("sandbox assessment failed: {err}");
|
||||
parent_otel.sandbox_assessment(call_id, "model_error", None, duration);
|
||||
}
|
||||
Err(_) => {
|
||||
warn!("sandbox assessment timed out");
|
||||
parent_otel.sandbox_assessment(call_id, "timeout", None, duration);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn summarize_sandbox_policy(policy: &SandboxPolicy) -> String {
|
||||
match policy {
|
||||
SandboxPolicy::DangerFullAccess => "danger-full-access".to_string(),
|
||||
SandboxPolicy::ReadOnly => "read-only".to_string(),
|
||||
SandboxPolicy::WorkspaceWrite { network_access, .. } => {
|
||||
let network = if *network_access {
|
||||
"network"
|
||||
} else {
|
||||
"no-network"
|
||||
};
|
||||
format!("workspace-write (network_access={network})")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn sandbox_roots_for_prompt(policy: &SandboxPolicy, cwd: &Path) -> Vec<PathBuf> {
|
||||
let mut roots = vec![cwd.to_path_buf()];
|
||||
if let SandboxPolicy::WorkspaceWrite { writable_roots, .. } = policy {
|
||||
roots.extend(writable_roots.iter().cloned());
|
||||
}
|
||||
roots
|
||||
}
|
||||
|
||||
fn sandbox_assessment_schema() -> serde_json::Value {
|
||||
json!({
|
||||
"type": "object",
|
||||
"required": ["description", "risk_level"],
|
||||
"properties": {
|
||||
"description": {
|
||||
"type": "string",
|
||||
"minLength": 1,
|
||||
"maxLength": 500
|
||||
},
|
||||
"risk_level": {
|
||||
"type": "string",
|
||||
"enum": ["low", "medium", "high"]
|
||||
},
|
||||
},
|
||||
"additionalProperties": false
|
||||
})
|
||||
}
|
||||
|
||||
fn response_item_text(item: &ResponseItem) -> Option<String> {
|
||||
match item {
|
||||
ResponseItem::Message { content, .. } => {
|
||||
let mut buffers: Vec<&str> = Vec::new();
|
||||
for segment in content {
|
||||
match segment {
|
||||
ContentItem::InputText { text } | ContentItem::OutputText { text } => {
|
||||
if !text.is_empty() {
|
||||
buffers.push(text);
|
||||
}
|
||||
}
|
||||
ContentItem::InputImage { .. } => {}
|
||||
}
|
||||
}
|
||||
if buffers.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(buffers.join("\n"))
|
||||
}
|
||||
}
|
||||
ResponseItem::FunctionCallOutput { output, .. } => Some(output.content.clone()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
|
@ -6,8 +6,6 @@ sandbox placement and transformation of portable CommandSpec into a
|
|||
ready‑to‑spawn environment.
|
||||
*/
|
||||
|
||||
pub mod assessment;
|
||||
|
||||
use crate::exec::ExecExpiration;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::exec::SandboxType;
|
||||
|
|
|
|||
|
|
@ -7,12 +7,10 @@ retry without sandbox on denial (no re‑approval thanks to caching).
|
|||
*/
|
||||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::error::get_error_message_ui;
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxOverride;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
|
@ -43,7 +41,6 @@ impl ToolOrchestrator {
|
|||
) -> Result<Out, ToolError>
|
||||
where
|
||||
T: ToolRuntime<Rq, Out>,
|
||||
Rq: ProvidesSandboxRetryData,
|
||||
{
|
||||
let otel = turn_ctx.client.get_otel_event_manager();
|
||||
let otel_tn = &tool_ctx.tool_name;
|
||||
|
|
@ -65,26 +62,11 @@ impl ToolOrchestrator {
|
|||
return Err(ToolError::Rejected(reason));
|
||||
}
|
||||
ExecApprovalRequirement::NeedsApproval { reason, .. } => {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
None,
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: reason,
|
||||
risk,
|
||||
};
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
||||
|
|
@ -141,33 +123,12 @@ impl ToolOrchestrator {
|
|||
|
||||
// Ask for approval before retrying without sandbox.
|
||||
if !tool.should_bypass_approval(approval_policy, already_approved) {
|
||||
let mut risk = None;
|
||||
|
||||
if let Some(metadata) = req.sandbox_retry_data() {
|
||||
let err = SandboxErr::Denied {
|
||||
output: output.clone(),
|
||||
};
|
||||
let friendly = get_error_message_ui(&CodexErr::Sandbox(err));
|
||||
let failure_summary = format!("failed in sandbox: {friendly}");
|
||||
|
||||
risk = tool_ctx
|
||||
.session
|
||||
.assess_sandbox_command(
|
||||
turn_ctx,
|
||||
&tool_ctx.call_id,
|
||||
&metadata.command,
|
||||
Some(failure_summary.as_str()),
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let reason_msg = build_denial_reason_from_output(output.as_ref());
|
||||
let approval_ctx = ApprovalCtx {
|
||||
session: tool_ctx.session,
|
||||
turn: turn_ctx,
|
||||
call_id: &tool_ctx.call_id,
|
||||
retry_reason: Some(reason_msg),
|
||||
risk,
|
||||
};
|
||||
|
||||
let decision = tool.start_approval_async(req, approval_ctx).await;
|
||||
|
|
|
|||
|
|
@ -11,9 +11,7 @@ use crate::sandboxing::SandboxPermissions;
|
|||
use crate::sandboxing::execute_env;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
|
@ -35,12 +33,6 @@ pub struct ApplyPatchRequest {
|
|||
pub codex_exe: Option<PathBuf>,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ApplyPatchRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ApplyPatchRuntime;
|
||||
|
||||
|
|
@ -115,7 +107,6 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
|||
let call_id = ctx.call_id.to_string();
|
||||
let cwd = req.cwd.clone();
|
||||
let retry_reason = ctx.retry_reason.clone();
|
||||
let risk = ctx.risk.clone();
|
||||
let user_explicitly_approved = req.user_explicitly_approved;
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, move || async move {
|
||||
|
|
@ -127,7 +118,6 @@ impl Approvable<ApplyPatchRequest> for ApplyPatchRuntime {
|
|||
vec!["apply_patch".to_string()],
|
||||
cwd,
|
||||
Some(reason),
|
||||
risk,
|
||||
None,
|
||||
)
|
||||
.await
|
||||
|
|
|
|||
|
|
@ -11,10 +11,8 @@ use crate::tools::runtimes::build_command_spec;
|
|||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxOverride;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
|
@ -36,15 +34,6 @@ pub struct ShellRequest {
|
|||
pub exec_approval_requirement: ExecApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for ShellRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ShellRuntime;
|
||||
|
||||
|
|
@ -101,7 +90,6 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
|||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let risk = ctx.risk.clone();
|
||||
let session = ctx.session;
|
||||
let turn = ctx.turn;
|
||||
let call_id = ctx.call_id.to_string();
|
||||
|
|
@ -114,7 +102,6 @@ impl Approvable<ShellRequest> for ShellRuntime {
|
|||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
req.exec_approval_requirement
|
||||
.proposed_execpolicy_amendment()
|
||||
.cloned(),
|
||||
|
|
|
|||
|
|
@ -12,10 +12,8 @@ use crate::tools::runtimes::build_command_spec;
|
|||
use crate::tools::sandboxing::Approvable;
|
||||
use crate::tools::sandboxing::ApprovalCtx;
|
||||
use crate::tools::sandboxing::ExecApprovalRequirement;
|
||||
use crate::tools::sandboxing::ProvidesSandboxRetryData;
|
||||
use crate::tools::sandboxing::SandboxAttempt;
|
||||
use crate::tools::sandboxing::SandboxOverride;
|
||||
use crate::tools::sandboxing::SandboxRetryData;
|
||||
use crate::tools::sandboxing::Sandboxable;
|
||||
use crate::tools::sandboxing::SandboxablePreference;
|
||||
use crate::tools::sandboxing::ToolCtx;
|
||||
|
|
@ -40,15 +38,6 @@ pub struct UnifiedExecRequest {
|
|||
pub exec_approval_requirement: ExecApprovalRequirement,
|
||||
}
|
||||
|
||||
impl ProvidesSandboxRetryData for UnifiedExecRequest {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData> {
|
||||
Some(SandboxRetryData {
|
||||
command: self.command.clone(),
|
||||
cwd: self.cwd.clone(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(serde::Serialize, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct UnifiedExecApprovalKey {
|
||||
pub command: Vec<String>,
|
||||
|
|
@ -122,7 +111,6 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
|||
.retry_reason
|
||||
.clone()
|
||||
.or_else(|| req.justification.clone());
|
||||
let risk = ctx.risk.clone();
|
||||
Box::pin(async move {
|
||||
with_cached_approval(&session.services, key, || async move {
|
||||
session
|
||||
|
|
@ -132,7 +120,6 @@ impl Approvable<UnifiedExecRequest> for UnifiedExecRuntime<'_> {
|
|||
command,
|
||||
cwd,
|
||||
reason,
|
||||
risk,
|
||||
req.exec_approval_requirement
|
||||
.proposed_execpolicy_amendment()
|
||||
.cloned(),
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
use crate::codex::Session;
|
||||
use crate::codex::TurnContext;
|
||||
use crate::error::CodexErr;
|
||||
use crate::protocol::SandboxCommandAssessment;
|
||||
use crate::protocol::SandboxPolicy;
|
||||
use crate::sandboxing::CommandSpec;
|
||||
use crate::sandboxing::SandboxManager;
|
||||
|
|
@ -20,7 +19,6 @@ use std::collections::HashMap;
|
|||
use std::fmt::Debug;
|
||||
use std::hash::Hash;
|
||||
use std::path::Path;
|
||||
use std::path::PathBuf;
|
||||
|
||||
use futures::Future;
|
||||
use futures::future::BoxFuture;
|
||||
|
|
@ -84,7 +82,6 @@ pub(crate) struct ApprovalCtx<'a> {
|
|||
pub turn: &'a TurnContext,
|
||||
pub call_id: &'a str,
|
||||
pub retry_reason: Option<String>,
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// Specifies what tool orchestrator should do with a given tool call.
|
||||
|
|
@ -219,17 +216,6 @@ pub(crate) struct ToolCtx<'a> {
|
|||
pub tool_name: String,
|
||||
}
|
||||
|
||||
/// Captures the command metadata needed to re-run a tool request without sandboxing.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub(crate) struct SandboxRetryData {
|
||||
pub command: Vec<String>,
|
||||
pub cwd: PathBuf,
|
||||
}
|
||||
|
||||
pub(crate) trait ProvidesSandboxRetryData {
|
||||
fn sandbox_retry_data(&self) -> Option<SandboxRetryData>;
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum ToolError {
|
||||
Rejected(String),
|
||||
|
|
|
|||
|
|
@ -1,24 +0,0 @@
|
|||
You are a security analyst evaluating shell commands that were blocked by a sandbox. Given the provided metadata, summarize the command's likely intent and assess the risk to help the user decide whether to approve command execution. Return strictly valid JSON with the keys:
|
||||
- description (concise summary of command intent and potential effects, no more than one sentence, use present tense)
|
||||
- risk_level ("low", "medium", or "high")
|
||||
Risk level examples:
|
||||
- low: read-only inspections, listing files, printing configuration, fetching artifacts from trusted sources
|
||||
- medium: modifying project files, installing dependencies
|
||||
- high: deleting or overwriting data, exfiltrating secrets, escalating privileges, or disabling security controls
|
||||
If information is insufficient, choose the most cautious risk level supported by the evidence.
|
||||
Respond with JSON only, without markdown code fences or extra commentary.
|
||||
|
||||
---
|
||||
|
||||
Command metadata:
|
||||
Platform: {{ platform }}
|
||||
Sandbox policy: {{ sandbox_policy }}
|
||||
{% if let Some(roots) = filesystem_roots %}
|
||||
Filesystem roots: {{ roots }}
|
||||
{% endif %}
|
||||
Working directory: {{ working_directory }}
|
||||
Command argv: {{ command_argv }}
|
||||
Command (joined): {{ command_joined }}
|
||||
{% if let Some(message) = sandbox_failure_message %}
|
||||
Sandbox failure message: {{ message }}
|
||||
{% endif %}
|
||||
|
|
@ -200,7 +200,6 @@ pub async fn run_main(cli: Cli, codex_linux_sandbox_exe: Option<PathBuf>) -> any
|
|||
include_apply_patch_tool: None,
|
||||
show_raw_agent_reasoning: oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: add_dir,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -169,7 +169,6 @@ impl CodexToolCallParam {
|
|||
include_apply_patch_tool: None,
|
||||
show_raw_agent_reasoning: None,
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: Vec::new(),
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -179,7 +179,6 @@ async fn run_codex_tool_session_inner(
|
|||
cwd,
|
||||
call_id,
|
||||
reason: _,
|
||||
risk,
|
||||
proposed_execpolicy_amendment: _,
|
||||
parsed_cmd,
|
||||
}) => {
|
||||
|
|
@ -193,7 +192,6 @@ async fn run_codex_tool_session_inner(
|
|||
event.id.clone(),
|
||||
call_id,
|
||||
parsed_cmd,
|
||||
risk,
|
||||
)
|
||||
.await;
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ use std::sync::Arc;
|
|||
use codex_core::CodexConversation;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_protocol::parse_command::ParsedCommand;
|
||||
use mcp_types::ElicitRequest;
|
||||
use mcp_types::ElicitRequestParamsRequestedSchema;
|
||||
|
|
@ -38,8 +37,6 @@ pub struct ExecApprovalElicitRequestParams {
|
|||
pub codex_command: Vec<String>,
|
||||
pub codex_cwd: PathBuf,
|
||||
pub codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub codex_risk: Option<SandboxCommandAssessment>,
|
||||
}
|
||||
|
||||
// TODO(mbolin): ExecApprovalResponse does not conform to ElicitResult. See:
|
||||
|
|
@ -62,7 +59,6 @@ pub(crate) async fn handle_exec_approval_request(
|
|||
event_id: String,
|
||||
call_id: String,
|
||||
codex_parsed_cmd: Vec<ParsedCommand>,
|
||||
codex_risk: Option<SandboxCommandAssessment>,
|
||||
) {
|
||||
let escaped_command =
|
||||
shlex::try_join(command.iter().map(String::as_str)).unwrap_or_else(|_| command.join(" "));
|
||||
|
|
@ -85,7 +81,6 @@ pub(crate) async fn handle_exec_approval_request(
|
|||
codex_command: command,
|
||||
codex_cwd: cwd,
|
||||
codex_parsed_cmd,
|
||||
codex_risk,
|
||||
};
|
||||
let params_json = match serde_json::to_value(¶ms) {
|
||||
Ok(value) => value,
|
||||
|
|
|
|||
|
|
@ -200,7 +200,6 @@ fn create_expected_elicitation_request(
|
|||
codex_cwd: workdir.to_path_buf(),
|
||||
codex_call_id: "call1234".to_string(),
|
||||
codex_parsed_cmd,
|
||||
codex_risk: None,
|
||||
})?),
|
||||
})
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,7 +8,6 @@ use codex_protocol::openai_models::ReasoningEffort;
|
|||
use codex_protocol::protocol::AskForApproval;
|
||||
use codex_protocol::protocol::ReviewDecision;
|
||||
use codex_protocol::protocol::SandboxPolicy;
|
||||
use codex_protocol::protocol::SandboxRiskLevel;
|
||||
use codex_protocol::user_input::UserInput;
|
||||
use eventsource_stream::Event as StreamEvent;
|
||||
use eventsource_stream::EventStreamError as StreamError;
|
||||
|
|
@ -374,52 +373,6 @@ impl OtelEventManager {
|
|||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment(
|
||||
&self,
|
||||
call_id: &str,
|
||||
status: &str,
|
||||
risk_level: Option<SandboxRiskLevel>,
|
||||
duration: Duration,
|
||||
) {
|
||||
let level = risk_level.map(|level| level.as_str());
|
||||
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
status = %status,
|
||||
risk_level = level,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub fn sandbox_assessment_latency(&self, call_id: &str, duration: Duration) {
|
||||
tracing::event!(
|
||||
tracing::Level::INFO,
|
||||
event.name = "codex.sandbox_assessment_latency",
|
||||
event.timestamp = %timestamp(),
|
||||
conversation.id = %self.metadata.conversation_id,
|
||||
app.version = %self.metadata.app_version,
|
||||
auth_mode = self.metadata.auth_mode,
|
||||
user.account_id = self.metadata.account_id,
|
||||
user.email = self.metadata.account_email,
|
||||
terminal.type = %self.metadata.terminal_type,
|
||||
model = %self.metadata.model,
|
||||
slug = %self.metadata.slug,
|
||||
call_id = %call_id,
|
||||
duration_ms = %duration.as_millis(),
|
||||
);
|
||||
}
|
||||
|
||||
pub async fn log_tool_result<F, Fut, E>(
|
||||
&self,
|
||||
tool_name: &str,
|
||||
|
|
|
|||
|
|
@ -9,14 +9,6 @@ use serde::Deserialize;
|
|||
use serde::Serialize;
|
||||
use ts_rs::TS;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Deserialize, Serialize, PartialEq, Eq, Hash, JsonSchema, TS)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum SandboxRiskLevel {
|
||||
Low,
|
||||
Medium,
|
||||
High,
|
||||
}
|
||||
|
||||
/// Proposed execpolicy change to allow commands starting with this prefix.
|
||||
///
|
||||
/// The `command` tokens form the prefix that would be added as an execpolicy
|
||||
|
|
@ -45,22 +37,6 @@ impl From<Vec<String>> for ExecPolicyAmendment {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, PartialEq, Eq, JsonSchema, TS)]
|
||||
pub struct SandboxCommandAssessment {
|
||||
pub description: String,
|
||||
pub risk_level: SandboxRiskLevel,
|
||||
}
|
||||
|
||||
impl SandboxRiskLevel {
|
||||
pub fn as_str(&self) -> &'static str {
|
||||
match self {
|
||||
Self::Low => "low",
|
||||
Self::Medium => "medium",
|
||||
Self::High => "high",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema, TS)]
|
||||
pub struct ExecApprovalRequestEvent {
|
||||
/// Identifier for the associated exec call, if available.
|
||||
|
|
@ -76,9 +52,6 @@ pub struct ExecApprovalRequestEvent {
|
|||
/// Optional human-readable reason for the approval (e.g. retry without sandbox).
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub reason: Option<String>,
|
||||
/// Optional model-provided risk assessment describing the blocked command.
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub risk: Option<SandboxCommandAssessment>,
|
||||
/// Proposed execpolicy amendment that can be applied to allow future runs.
|
||||
#[serde(default, skip_serializing_if = "Option::is_none")]
|
||||
#[ts(optional)]
|
||||
|
|
|
|||
|
|
@ -40,8 +40,6 @@ pub use crate::approvals::ApplyPatchApprovalRequestEvent;
|
|||
pub use crate::approvals::ElicitationAction;
|
||||
pub use crate::approvals::ExecApprovalRequestEvent;
|
||||
pub use crate::approvals::ExecPolicyAmendment;
|
||||
pub use crate::approvals::SandboxCommandAssessment;
|
||||
pub use crate::approvals::SandboxRiskLevel;
|
||||
|
||||
/// Open/close tags for special user-input blocks. Used across crates to avoid
|
||||
/// duplicated hardcoded strings.
|
||||
|
|
|
|||
|
|
@ -23,8 +23,6 @@ use codex_core::protocol::ExecPolicyAmendment;
|
|||
use codex_core::protocol::FileChange;
|
||||
use codex_core::protocol::Op;
|
||||
use codex_core::protocol::ReviewDecision;
|
||||
use codex_core::protocol::SandboxCommandAssessment;
|
||||
use codex_core::protocol::SandboxRiskLevel;
|
||||
use crossterm::event::KeyCode;
|
||||
use crossterm::event::KeyEvent;
|
||||
use crossterm::event::KeyEventKind;
|
||||
|
|
@ -45,7 +43,6 @@ pub(crate) enum ApprovalRequest {
|
|||
id: String,
|
||||
command: Vec<String>,
|
||||
reason: Option<String>,
|
||||
risk: Option<SandboxCommandAssessment>,
|
||||
proposed_execpolicy_amendment: Option<ExecPolicyAmendment>,
|
||||
},
|
||||
ApplyPatch {
|
||||
|
|
@ -345,18 +342,11 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
|||
id,
|
||||
command,
|
||||
reason,
|
||||
risk,
|
||||
proposed_execpolicy_amendment,
|
||||
} => {
|
||||
let reason = reason.filter(|item| !item.is_empty());
|
||||
let has_reason = reason.is_some();
|
||||
let mut header: Vec<Line<'static>> = Vec::new();
|
||||
if let Some(reason) = reason {
|
||||
header.push(Line::from(vec!["Reason: ".into(), reason.italic()]));
|
||||
}
|
||||
if let Some(risk) = risk.as_ref() {
|
||||
header.extend(render_risk_lines(risk));
|
||||
} else if has_reason {
|
||||
header.push(Line::from(""));
|
||||
}
|
||||
let full_cmd = strip_bash_lc_and_escape(&command);
|
||||
|
|
@ -419,28 +409,6 @@ impl From<ApprovalRequest> for ApprovalRequestState {
|
|||
}
|
||||
}
|
||||
|
||||
fn render_risk_lines(risk: &SandboxCommandAssessment) -> Vec<Line<'static>> {
|
||||
let level_span = match risk.risk_level {
|
||||
SandboxRiskLevel::Low => "LOW".green().bold(),
|
||||
SandboxRiskLevel::Medium => "MEDIUM".cyan().bold(),
|
||||
SandboxRiskLevel::High => "HIGH".red().bold(),
|
||||
};
|
||||
|
||||
let mut lines = Vec::new();
|
||||
|
||||
let description = risk.description.trim();
|
||||
if !description.is_empty() {
|
||||
lines.push(Line::from(vec![
|
||||
"Summary: ".into(),
|
||||
description.to_string().into(),
|
||||
]));
|
||||
}
|
||||
|
||||
lines.push(vec!["Risk: ".into(), level_span].into());
|
||||
lines.push(Line::from(""));
|
||||
lines
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
enum ApprovalVariant {
|
||||
Exec {
|
||||
|
|
@ -570,7 +538,6 @@ mod tests {
|
|||
id: "test".to_string(),
|
||||
command: vec!["echo".to_string(), "hi".to_string()],
|
||||
reason: Some("reason".to_string()),
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
}
|
||||
}
|
||||
|
|
@ -613,7 +580,6 @@ mod tests {
|
|||
id: "test".to_string(),
|
||||
command: vec!["echo".to_string()],
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"echo".to_string(),
|
||||
])),
|
||||
|
|
@ -652,7 +618,6 @@ mod tests {
|
|||
id: "test".to_string(),
|
||||
command: vec!["echo".to_string()],
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"echo".to_string(),
|
||||
])),
|
||||
|
|
@ -679,7 +644,6 @@ mod tests {
|
|||
id: "test".into(),
|
||||
command,
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -570,7 +570,6 @@ mod tests {
|
|||
id: "1".to_string(),
|
||||
command: vec!["echo".into(), "ok".into()],
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1095,7 +1095,6 @@ impl ChatWidget {
|
|||
id,
|
||||
command: ev.command,
|
||||
reason: ev.reason,
|
||||
risk: ev.risk,
|
||||
proposed_execpolicy_amendment: ev.proposed_execpolicy_amendment,
|
||||
};
|
||||
self.bottom_pane
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
source: tui/src/chatwidget/tests.rs
|
||||
expression: terminal.backend().vt100().screen().contents()
|
||||
---
|
||||
|
||||
|
||||
Would you like to run the following command?
|
||||
|
||||
Reason: this is a test reason such as one that would be produced by the model
|
||||
|
|
|
|||
|
|
@ -3,4 +3,3 @@ source: tui/src/chatwidget/tests.rs
|
|||
expression: lines_to_single_string(&decision)
|
||||
---
|
||||
✔ You approved codex to run echo hello world this time
|
||||
|
||||
|
|
|
|||
|
|
@ -755,7 +755,6 @@ fn exec_approval_emits_proposed_command_and_decision_history() {
|
|||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
|
|
@ -800,7 +799,6 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
|||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
|
|
@ -851,7 +849,6 @@ fn exec_approval_decision_truncates_multiline_and_long_commands() {
|
|||
command: vec!["bash".into(), "-lc".into(), long],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: None,
|
||||
parsed_cmd: vec![],
|
||||
};
|
||||
|
|
@ -2105,7 +2102,6 @@ fn approval_modal_exec_snapshot() {
|
|||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"echo".into(),
|
||||
"hello".into(),
|
||||
|
|
@ -2157,7 +2153,6 @@ fn approval_modal_exec_without_reason_snapshot() {
|
|||
command: vec!["bash".into(), "-lc".into(), "echo hello world".into()],
|
||||
cwd: std::env::current_dir().unwrap_or_else(|_| PathBuf::from(".")),
|
||||
reason: None,
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"echo".into(),
|
||||
"hello".into(),
|
||||
|
|
@ -2376,7 +2371,6 @@ fn status_widget_and_approval_modal_snapshot() {
|
|||
reason: Some(
|
||||
"this is a test reason such as one that would be produced by the model".into(),
|
||||
),
|
||||
risk: None,
|
||||
proposed_execpolicy_amendment: Some(ExecPolicyAmendment::new(vec![
|
||||
"echo".into(),
|
||||
"hello world".into(),
|
||||
|
|
|
|||
|
|
@ -218,7 +218,6 @@ pub async fn run_main(
|
|||
include_apply_patch_tool: None,
|
||||
show_raw_agent_reasoning: cli.oss.then_some(true),
|
||||
tools_web_search_request: None,
|
||||
experimental_sandbox_command_assessment: None,
|
||||
additional_writable_roots: additional_dirs,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -39,17 +39,16 @@ web_search_request = true # allow the model to request web searches
|
|||
|
||||
Supported features:
|
||||
|
||||
| Key | Default | Stage | Description |
|
||||
| ----------------------------------------- | :-----: | ------------ | ----------------------------------------------------- |
|
||||
| `unified_exec` | false | Experimental | Use the unified PTY-backed exec tool |
|
||||
| `rmcp_client` | false | Experimental | Enable oauth support for streamable HTTP MCP servers |
|
||||
| `apply_patch_freeform` | false | Beta | Include the freeform `apply_patch` tool |
|
||||
| `view_image_tool` | true | Stable | Include the `view_image` tool |
|
||||
| `web_search_request` | false | Stable | Allow the model to issue web searches |
|
||||
| `experimental_sandbox_command_assessment` | false | Experimental | Enable model-based sandbox risk assessment |
|
||||
| `ghost_commit` | false | Experimental | Create a ghost commit each turn |
|
||||
| `enable_experimental_windows_sandbox` | false | Experimental | Use the Windows restricted-token sandbox |
|
||||
| `tui2` | false | Experimental | Use the experimental TUI v2 (viewport) implementation |
|
||||
| Key | Default | Stage | Description |
|
||||
| ------------------------------------- | :-----: | ------------ | ----------------------------------------------------- |
|
||||
| `unified_exec` | false | Experimental | Use the unified PTY-backed exec tool |
|
||||
| `rmcp_client` | false | Experimental | Enable oauth support for streamable HTTP MCP servers |
|
||||
| `apply_patch_freeform` | false | Beta | Include the freeform `apply_patch` tool |
|
||||
| `view_image_tool` | true | Stable | Include the `view_image` tool |
|
||||
| `web_search_request` | false | Stable | Allow the model to issue web searches |
|
||||
| `ghost_commit` | false | Experimental | Create a ghost commit each turn |
|
||||
| `enable_experimental_windows_sandbox` | false | Experimental | Use the Windows restricted-token sandbox |
|
||||
| `tui2` | false | Experimental | Use the experimental TUI v2 (viewport) implementation |
|
||||
|
||||
Notes:
|
||||
|
||||
|
|
|
|||
|
|
@ -218,7 +218,6 @@ rmcp_client = false
|
|||
apply_patch_freeform = false
|
||||
view_image_tool = true
|
||||
web_search_request = false
|
||||
experimental_sandbox_command_assessment = false
|
||||
ghost_commit = false
|
||||
enable_experimental_windows_sandbox = false
|
||||
|
||||
|
|
@ -314,7 +313,6 @@ experimental_use_freeform_apply_patch = false
|
|||
# experimental_compact_prompt_file = "compact_prompt.txt"
|
||||
# include_apply_patch_tool = false
|
||||
# experimental_use_freeform_apply_patch = false
|
||||
# experimental_sandbox_command_assessment = false
|
||||
# tools_web_search = false
|
||||
# tools_view_image = true
|
||||
# features = { unified_exec = false }
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue