feat(windows) start powershell in utf-8 mode (#7902)
## Summary Adds a FeatureFlag to enforce UTF8 encoding in powershell, particularly Windows Powershell v5. This should help address issues like #7290. Notably, this PR does not include the ability to parse `apply_patch` invocations within UTF8 shell commands (calls to the freeform tool should not be impacted). I am leaving this out of scope for now. We should address before this feature becomes Stable, but those cases are not the default behavior at this time so we're okay for experimentation phase. We should continue cleaning up the `apply_patch::invocation` logic and then can handle it more cleanly. ## Testing - [x] Adds additional testing
This commit is contained in:
parent
b24b7884c7
commit
33e1d0844a
6 changed files with 251 additions and 3 deletions
|
|
@ -91,6 +91,8 @@ pub enum Feature {
|
|||
Tui2,
|
||||
/// Enable discovery and injection of skills.
|
||||
Skills,
|
||||
/// Enforce UTF8 output in Powershell.
|
||||
PowershellUtf8,
|
||||
}
|
||||
|
||||
impl Feature {
|
||||
|
|
@ -386,6 +388,12 @@ pub const FEATURES: &[FeatureSpec] = &[
|
|||
stage: Stage::Experimental,
|
||||
default_enabled: true,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::PowershellUtf8,
|
||||
key: "powershell_utf8",
|
||||
stage: Stage::Experimental,
|
||||
default_enabled: false,
|
||||
},
|
||||
FeatureSpec {
|
||||
id: Feature::Tui2,
|
||||
key: "tui2",
|
||||
|
|
|
|||
|
|
@ -8,6 +8,30 @@ use crate::shell::detect_shell_type;
|
|||
|
||||
const POWERSHELL_FLAGS: &[&str] = &["-nologo", "-noprofile", "-command", "-c"];
|
||||
|
||||
/// Prefixed command for powershell shell calls to force UTF-8 console output.
|
||||
pub(crate) const UTF8_OUTPUT_PREFIX: &str =
|
||||
"[Console]::OutputEncoding=[System.Text.Encoding]::UTF8;\n";
|
||||
|
||||
pub(crate) fn prefix_powershell_script_with_utf8(command: &[String]) -> Vec<String> {
|
||||
let Some((_, script)) = extract_powershell_command(command) else {
|
||||
return command.to_vec();
|
||||
};
|
||||
|
||||
let trimmed = script.trim_start();
|
||||
let script = if trimmed.starts_with(UTF8_OUTPUT_PREFIX) {
|
||||
script.to_string()
|
||||
} else {
|
||||
format!("{UTF8_OUTPUT_PREFIX}{script}")
|
||||
};
|
||||
|
||||
let mut command: Vec<String> = command[..(command.len() - 1)]
|
||||
.iter()
|
||||
.map(std::string::ToString::to_string)
|
||||
.collect();
|
||||
command.push(script);
|
||||
command
|
||||
}
|
||||
|
||||
/// Extract the PowerShell script body from an invocation such as:
|
||||
///
|
||||
/// - ["pwsh", "-NoProfile", "-Command", "Get-ChildItem -Recurse | Select-String foo"]
|
||||
|
|
@ -22,7 +46,10 @@ pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
|
|||
}
|
||||
|
||||
let shell = &command[0];
|
||||
if detect_shell_type(&PathBuf::from(shell)) != Some(ShellType::PowerShell) {
|
||||
if !matches!(
|
||||
detect_shell_type(&PathBuf::from(shell)),
|
||||
Some(ShellType::PowerShell)
|
||||
) {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
@ -36,7 +63,7 @@ pub fn extract_powershell_command(command: &[String]) -> Option<(&str, &str)> {
|
|||
}
|
||||
if flag.eq_ignore_ascii_case("-Command") || flag.eq_ignore_ascii_case("-c") {
|
||||
let script = &command[i + 1];
|
||||
return Some((shell, script.as_str()));
|
||||
return Some((shell, script));
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,8 +5,11 @@ Executes shell requests under the orchestrator: asks for approval when needed,
|
|||
builds a CommandSpec, and runs it under the current SandboxAttempt.
|
||||
*/
|
||||
use crate::exec::ExecToolCallOutput;
|
||||
use crate::features::Feature;
|
||||
use crate::powershell::prefix_powershell_script_with_utf8;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::sandboxing::execute_env;
|
||||
use crate::shell::ShellType;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
|
|
@ -144,6 +147,13 @@ impl ToolRuntime<ShellRequest, ExecToolCallOutput> for ShellRuntime {
|
|||
let base_command = &req.command;
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
|
||||
let command = if matches!(session_shell.shell_type, ShellType::PowerShell)
|
||||
&& ctx.session.features().enabled(Feature::PowershellUtf8)
|
||||
{
|
||||
prefix_powershell_script_with_utf8(&command)
|
||||
} else {
|
||||
command
|
||||
};
|
||||
|
||||
let spec = build_command_spec(
|
||||
&command,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,10 @@ the session manager to spawn PTYs once an ExecEnv is prepared.
|
|||
use crate::error::CodexErr;
|
||||
use crate::error::SandboxErr;
|
||||
use crate::exec::ExecExpiration;
|
||||
use crate::features::Feature;
|
||||
use crate::powershell::prefix_powershell_script_with_utf8;
|
||||
use crate::sandboxing::SandboxPermissions;
|
||||
use crate::shell::ShellType;
|
||||
use crate::tools::runtimes::build_command_spec;
|
||||
use crate::tools::runtimes::maybe_wrap_shell_lc_with_snapshot;
|
||||
use crate::tools::sandboxing::Approvable;
|
||||
|
|
@ -165,6 +168,13 @@ impl<'a> ToolRuntime<UnifiedExecRequest, UnifiedExecSession> for UnifiedExecRunt
|
|||
let base_command = &req.command;
|
||||
let session_shell = ctx.session.user_shell();
|
||||
let command = maybe_wrap_shell_lc_with_snapshot(base_command, session_shell.as_ref());
|
||||
let command = if matches!(session_shell.shell_type, ShellType::PowerShell)
|
||||
&& ctx.session.features().enabled(Feature::PowershellUtf8)
|
||||
{
|
||||
prefix_powershell_script_with_utf8(&command)
|
||||
} else {
|
||||
command
|
||||
};
|
||||
|
||||
let spec = build_command_spec(
|
||||
&command,
|
||||
|
|
|
|||
|
|
@ -2,10 +2,13 @@
|
|||
|
||||
use anyhow::Result;
|
||||
use core_test_support::responses::ev_apply_patch_call;
|
||||
use core_test_support::responses::ev_apply_patch_custom_tool_call;
|
||||
use core_test_support::responses::ev_shell_command_call;
|
||||
use core_test_support::test_codex::ApplyPatchModelOutput;
|
||||
use pretty_assertions::assert_eq;
|
||||
use std::fs;
|
||||
use std::sync::atomic::AtomicI32;
|
||||
use std::sync::atomic::Ordering;
|
||||
|
||||
use codex_core::features::Feature;
|
||||
use codex_core::protocol::AskForApproval;
|
||||
|
|
@ -29,6 +32,11 @@ use core_test_support::test_codex::test_codex;
|
|||
use core_test_support::wait_for_event;
|
||||
use serde_json::json;
|
||||
use test_case::test_case;
|
||||
use wiremock::Mock;
|
||||
use wiremock::Respond;
|
||||
use wiremock::ResponseTemplate;
|
||||
use wiremock::matchers::method;
|
||||
use wiremock::matchers::path_regex;
|
||||
|
||||
pub async fn apply_patch_harness() -> Result<TestCodexHarness> {
|
||||
apply_patch_harness_with(|builder| builder).await
|
||||
|
|
@ -718,6 +726,132 @@ async fn apply_patch_shell_command_heredoc_with_cd_updates_relative_workdir() ->
|
|||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_cli_can_use_shell_command_output_as_patch_input() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = apply_patch_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let source_contents = "line1\nnaïve café\nline3\n";
|
||||
let source_path = harness.path("source.txt");
|
||||
fs::write(&source_path, source_contents)?;
|
||||
|
||||
let read_call_id = "read-source";
|
||||
let apply_call_id = "apply-from-read";
|
||||
|
||||
fn stdout_from_shell_output(output: &str) -> String {
|
||||
let normalized = output.replace("\r\n", "\n").replace('\r', "\n");
|
||||
normalized
|
||||
.split_once("Output:\n")
|
||||
.map(|x| x.1)
|
||||
.unwrap_or("")
|
||||
.trim_end_matches('\n')
|
||||
.to_string()
|
||||
}
|
||||
|
||||
fn function_call_output_text(body: &serde_json::Value, call_id: &str) -> String {
|
||||
body.get("input")
|
||||
.and_then(serde_json::Value::as_array)
|
||||
.and_then(|items| {
|
||||
items.iter().find(|item| {
|
||||
item.get("type").and_then(serde_json::Value::as_str)
|
||||
== Some("function_call_output")
|
||||
&& item.get("call_id").and_then(serde_json::Value::as_str) == Some(call_id)
|
||||
})
|
||||
})
|
||||
.and_then(|item| item.get("output").and_then(serde_json::Value::as_str))
|
||||
.expect("function_call_output output string")
|
||||
.to_string()
|
||||
}
|
||||
|
||||
struct DynamicApplyFromRead {
|
||||
num_calls: AtomicI32,
|
||||
read_call_id: String,
|
||||
apply_call_id: String,
|
||||
}
|
||||
|
||||
impl Respond for DynamicApplyFromRead {
|
||||
fn respond(&self, request: &wiremock::Request) -> ResponseTemplate {
|
||||
let call_num = self.num_calls.fetch_add(1, Ordering::SeqCst);
|
||||
match call_num {
|
||||
0 => {
|
||||
let command = if cfg!(windows) {
|
||||
"Get-Content -Encoding utf8 source.txt"
|
||||
} else {
|
||||
"cat source.txt"
|
||||
};
|
||||
let body = sse(vec![
|
||||
ev_response_created("resp-1"),
|
||||
ev_shell_command_call(&self.read_call_id, command),
|
||||
ev_completed("resp-1"),
|
||||
]);
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body)
|
||||
}
|
||||
1 => {
|
||||
let body_json: serde_json::Value =
|
||||
request.body_json().expect("request body should be json");
|
||||
let read_output = function_call_output_text(&body_json, &self.read_call_id);
|
||||
eprintln!("read_output: \n{read_output}");
|
||||
let stdout = stdout_from_shell_output(&read_output);
|
||||
eprintln!("stdout: \n{stdout}");
|
||||
let patch_lines = stdout
|
||||
.lines()
|
||||
.map(|line| format!("+{line}"))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
let patch = format!(
|
||||
"*** Begin Patch\n*** Add File: target.txt\n{patch_lines}\n*** End Patch"
|
||||
);
|
||||
|
||||
eprintln!("patch: \n{patch}");
|
||||
|
||||
let body = sse(vec![
|
||||
ev_response_created("resp-2"),
|
||||
ev_apply_patch_custom_tool_call(&self.apply_call_id, &patch),
|
||||
ev_completed("resp-2"),
|
||||
]);
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body)
|
||||
}
|
||||
2 => {
|
||||
let body = sse(vec![
|
||||
ev_assistant_message("msg-1", "ok"),
|
||||
ev_completed("resp-3"),
|
||||
]);
|
||||
ResponseTemplate::new(200)
|
||||
.insert_header("content-type", "text/event-stream")
|
||||
.set_body_string(body)
|
||||
}
|
||||
_ => panic!("no response for call {call_num}"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let responder = DynamicApplyFromRead {
|
||||
num_calls: AtomicI32::new(0),
|
||||
read_call_id: read_call_id.to_string(),
|
||||
apply_call_id: apply_call_id.to_string(),
|
||||
};
|
||||
Mock::given(method("POST"))
|
||||
.and(path_regex(".*/responses$"))
|
||||
.respond_with(responder)
|
||||
.expect(3)
|
||||
.mount(harness.server())
|
||||
.await;
|
||||
|
||||
harness
|
||||
.submit("read source.txt, then apply it to target.txt")
|
||||
.await?;
|
||||
|
||||
let target_contents = fs::read_to_string(harness.path("target.txt"))?;
|
||||
assert_eq!(target_contents, source_contents);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn apply_patch_shell_command_heredoc_with_cd_emits_turn_diff() -> Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
use anyhow::Result;
|
||||
use codex_core::features::Feature;
|
||||
use core_test_support::assert_regex_match;
|
||||
use core_test_support::responses::ev_assistant_message;
|
||||
use core_test_support::responses::ev_completed;
|
||||
|
|
@ -12,6 +13,7 @@ use core_test_support::test_codex::TestCodexBuilder;
|
|||
use core_test_support::test_codex::TestCodexHarness;
|
||||
use core_test_support::test_codex::test_codex;
|
||||
use serde_json::json;
|
||||
use test_case::test_case;
|
||||
|
||||
fn shell_responses_with_timeout(
|
||||
call_id: &str,
|
||||
|
|
@ -201,7 +203,6 @@ async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
|
|||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| builder.with_model("gpt-5.1")).await?;
|
||||
|
||||
let call_id = "shell-command-timeout";
|
||||
let command = if cfg!(windows) {
|
||||
"timeout /t 5"
|
||||
|
|
@ -224,3 +225,61 @@ async fn shell_command_times_out_with_timeout_ms() -> anyhow::Result<()> {
|
|||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[test_case(true ; "with_login")]
|
||||
#[test_case(false ; "without_login")]
|
||||
async fn unicode_output(login: bool) -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| {
|
||||
builder.with_model("gpt-5.2").with_config(|config| {
|
||||
config.features.enable(Feature::PowershellUtf8);
|
||||
})
|
||||
})
|
||||
.await?;
|
||||
|
||||
let call_id = "unicode_output";
|
||||
mount_shell_responses(
|
||||
&harness,
|
||||
call_id,
|
||||
"git -c alias.say='!printf \"%s\" \"naïve_café\"' say",
|
||||
Some(login),
|
||||
)
|
||||
.await;
|
||||
harness.submit("run the command without login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "naïve_café")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
#[test_case(true ; "with_login")]
|
||||
#[test_case(false ; "without_login")]
|
||||
async fn unicode_output_with_newlines(login: bool) -> anyhow::Result<()> {
|
||||
skip_if_no_network!(Ok(()));
|
||||
|
||||
let harness = shell_command_harness_with(|builder| {
|
||||
builder.with_model("gpt-5.2").with_config(|config| {
|
||||
config.features.enable(Feature::PowershellUtf8);
|
||||
})
|
||||
})
|
||||
.await?;
|
||||
|
||||
let call_id = "unicode_output";
|
||||
mount_shell_responses(
|
||||
&harness,
|
||||
call_id,
|
||||
"echo 'line1\nnaïve café\nline3'",
|
||||
Some(login),
|
||||
)
|
||||
.await;
|
||||
harness.submit("run the command without login").await?;
|
||||
|
||||
let output = harness.function_call_stdout(call_id).await;
|
||||
assert_shell_command_output(&output, "line1\\nnaïve café\\nline3")?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue