diff --git a/codex-rs/Cargo.lock b/codex-rs/Cargo.lock
index b8175ee36..01abeee6a 100644
--- a/codex-rs/Cargo.lock
+++ b/codex-rs/Cargo.lock
@@ -726,6 +726,17 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
+[[package]]
+name = "chardetng"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "14b8f0b65b7b08ae3c8187e8d77174de20cb6777864c6b832d8ad365999cf1ea"
+dependencies = [
+ "cfg-if",
+ "encoding_rs",
+ "memchr",
+]
+
 [[package]]
 name = "chrono"
 version = "0.4.42"
@@ -1081,6 +1092,7 @@ dependencies = [
  "async-trait",
  "base64",
  "bytes",
+ "chardetng",
  "chrono",
  "codex-app-server-protocol",
  "codex-apply-patch",
@@ -1103,6 +1115,7 @@ dependencies = [
  "ctor 0.5.0",
  "dirs",
  "dunce",
+ "encoding_rs",
  "env-flags",
  "escargot",
  "eventsource-stream",
diff --git a/codex-rs/Cargo.toml b/codex-rs/Cargo.toml
index e2aa98138..61ab7749b 100644
--- a/codex-rs/Cargo.toml
+++ b/codex-rs/Cargo.toml
@@ -111,6 +111,7 @@ axum = { version = "0.8", default-features = false }
 base64 = "0.22.1"
 bytes = "1.10.1"
 chrono = "0.4.42"
+chardetng = "0.1.17"
 clap = "4"
 clap_complete = "4"
 color-eyre = "0.6.3"
@@ -123,6 +124,7 @@ dotenvy = "0.15.7"
 dunce = "1.0.4"
 env-flags = "0.1.1"
 env_logger = "0.11.5"
+encoding_rs = "0.8.35"
 escargot = "0.5"
 eventsource-stream = "0.2.3"
 futures = { version = "0.3", default-features = false }
diff --git a/codex-rs/core/Cargo.toml b/codex-rs/core/Cargo.toml
index bc7b3668d..565b9ddd5 100644
--- a/codex-rs/core/Cargo.toml
+++ b/codex-rs/core/Cargo.toml
@@ -19,6 +19,7 @@ async-trait = { workspace = true }
 base64 = { workspace = true }
 bytes = { workspace = true }
 chrono = { workspace = true, features = ["serde"] }
+chardetng = { workspace = true }
 codex-app-server-protocol = { workspace = true }
 codex-apply-patch = { workspace = true }
 codex-async-utils = { workspace = true }
@@ -37,6 +38,7 @@ codex-windows-sandbox = { package = "codex-windows-sandbox", path = "../windows-
 dirs = { workspace = true }
 dunce = { workspace = true }
 env-flags = { workspace = true }
+encoding_rs = { workspace = true }
 eventsource-stream = { workspace = true }
 futures = { workspace = true }
 http = { workspace = true }
diff --git a/codex-rs/core/src/exec.rs b/codex-rs/core/src/exec.rs
index 583c2233a..f7a145663 100644
--- a/codex-rs/core/src/exec.rs
+++ b/codex-rs/core/src/exec.rs
@@ -28,6 +28,7 @@ use crate::sandboxing::ExecEnv;
 use crate::sandboxing::SandboxManager;
 use crate::spawn::StdioPolicy;
 use crate::spawn::spawn_child_async;
+use crate::text_encoding::bytes_to_string_smart;
 
 const DEFAULT_TIMEOUT_MS: u64 = 10_000;
 
@@ -414,7 +415,7 @@ impl StreamOutput<String> {
 impl StreamOutput<Vec<u8>> {
     pub fn from_utf8_lossy(&self) -> StreamOutput<String> {
         StreamOutput {
-            text: String::from_utf8_lossy(&self.text).to_string(),
+            text: bytes_to_string_smart(&self.text),
             truncated_after_lines: self.truncated_after_lines,
         }
     }
diff --git a/codex-rs/core/src/lib.rs b/codex-rs/core/src/lib.rs
index 2ae11d79d..6906489e7 100644
--- a/codex-rs/core/src/lib.rs
+++ b/codex-rs/core/src/lib.rs
@@ -39,6 +39,7 @@ pub mod parse_command;
 pub mod powershell;
 mod response_processing;
 pub mod sandboxing;
+mod text_encoding;
 pub mod token_data;
 mod truncate;
 mod unified_exec;
diff --git a/codex-rs/core/src/text_encoding.rs b/codex-rs/core/src/text_encoding.rs
new file mode 100644
index 000000000..fde44c419
--- /dev/null
+++ b/codex-rs/core/src/text_encoding.rs
@@ -0,0 +1,461 @@
+//! Text encoding detection and conversion utilities for shell output.
+//!
+//! Windows users frequently run into code pages such as CP1251 or CP866 when invoking commands
+//! through VS Code. Those bytes show up as invalid UTF-8 and used to be replaced with the standard
+//! Unicode replacement character. We now lean on `chardetng` and `encoding_rs` so we can
+//! automatically detect and decode the vast majority of legacy encodings before falling back to
+//! lossy UTF-8 decoding.
+
+use chardetng::EncodingDetector;
+use encoding_rs::Encoding;
+use encoding_rs::IBM866;
+use encoding_rs::WINDOWS_1252;
+
+/// Attempts to convert arbitrary bytes to UTF-8 with best-effort encoding detection.
+pub fn bytes_to_string_smart(bytes: &[u8]) -> String {
+    if bytes.is_empty() {
+        return String::new();
+    }
+
+    if let Ok(utf8_str) = std::str::from_utf8(bytes) {
+        return utf8_str.to_owned();
+    }
+
+    let encoding = detect_encoding(bytes);
+    decode_bytes(bytes, encoding)
+}
+
+// Windows-1252 reassigns a handful of 0x80-0x9F slots to smart punctuation (curly quotes, dashes,
+// ™). CP866 uses those *same byte values* for uppercase Cyrillic letters. When chardetng sees shell
+// snippets that mix these bytes with ASCII it sometimes guesses IBM866, so “smart quotes” render as
+// Cyrillic garbage (“УФЦ”) in VS Code. However, CP866 uppercase tokens are perfectly valid output
+// (e.g., `ПРИ test`) so we cannot flip every 0x80-0x9F byte to Windows-1252 either. The compromise
+// is to only coerce IBM866 to Windows-1252 when (a) the high bytes are exclusively the punctuation
+// values listed below and (b) we spot adjacent ASCII. This targets the real failure case without
+// clobbering legitimate Cyrillic text. If another code page has a similar collision, introduce a
+// dedicated allowlist (like this one) plus unit tests that capture the actual shell output we want
+// to preserve. Windows-1252 byte values for smart punctuation.
+const WINDOWS_1252_PUNCT_BYTES: [u8; 8] = [
+    0x91, // ‘ (left single quotation mark)
+    0x92, // ’ (right single quotation mark)
+    0x93, // “ (left double quotation mark)
+    0x94, // ” (right double quotation mark)
+    0x95, // • (bullet)
+    0x96, // – (en dash)
+    0x97, // — (em dash)
+    0x99, // ™ (trade mark sign)
+];
+
+fn detect_encoding(bytes: &[u8]) -> &'static Encoding {
+    let mut detector = EncodingDetector::new();
+    detector.feed(bytes, true);
+    let (encoding, _is_confident) = detector.guess_assess(None, true);
+
+    // chardetng occasionally reports IBM866 for short strings that only contain Windows-1252 “smart
+    // punctuation” bytes (0x80-0x9F) because that range maps to Cyrillic letters in IBM866. When
+    // those bytes show up alongside an ASCII word (typical shell output: `"“`test), we know the
+    // intent was likely CP1252 quotes/dashes. Prefer WINDOWS_1252 in that specific situation so we
+    // render the characters users expect instead of Cyrillic junk. References:
+    // - Windows-1252 reserving 0x80-0x9F for curly quotes/dashes:
+    //   https://en.wikipedia.org/wiki/Windows-1252
+    // - CP866 mapping 0x93/0x94/0x96 to Cyrillic letters, so the same bytes show up as “УФЦ” when
+    //   mis-decoded: https://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT
+    if encoding == IBM866 && looks_like_windows_1252_punctuation(bytes) {
+        return WINDOWS_1252;
+    }
+
+    encoding
+}
+
+fn decode_bytes(bytes: &[u8], encoding: &'static Encoding) -> String {
+    let (decoded, _, had_errors) = encoding.decode(bytes);
+
+    if had_errors {
+        return String::from_utf8_lossy(bytes).into_owned();
+    }
+
+    decoded.into_owned()
+}
+
+/// Detect whether the byte stream looks like Windows-1252 “smart punctuation” wrapped around
+/// otherwise-ASCII text.
+///
+/// Context: IBM866 and Windows-1252 share the 0x80-0x9F slot range. In IBM866 these bytes decode to
+/// Cyrillic letters, whereas Windows-1252 maps them to curly quotes and dashes. chardetng can guess
+/// IBM866 for short snippets that only contain those bytes, which turns shell output such as
+/// `“test”` into unreadable Cyrillic. To avoid that, we treat inputs comprising a handful of bytes
+/// from the problematic range plus ASCII letters as CP1252 punctuation. We deliberately do *not*
+/// cap how many of those punctuation bytes we accept: VS Code frequently prints several quoted
+/// phrases (e.g., `"foo" – "bar"`), and truncating the count would once again mis-decode those as
+/// Cyrillic. If we discover additional encodings with overlapping byte ranges, prefer adding
+/// encoding-specific byte allowlists like `WINDOWS_1252_PUNCT` and tests that exercise real-world
+/// shell snippets.
+fn looks_like_windows_1252_punctuation(bytes: &[u8]) -> bool {
+    let mut saw_extended_punctuation = false;
+    let mut saw_ascii_word = false;
+
+    for &byte in bytes {
+        if byte >= 0xA0 {
+            return false;
+        }
+        if (0x80..=0x9F).contains(&byte) {
+            if !is_windows_1252_punct(byte) {
+                return false;
+            }
+            saw_extended_punctuation = true;
+        }
+        if byte.is_ascii_alphabetic() {
+            saw_ascii_word = true;
+        }
+    }
+
+    saw_extended_punctuation && saw_ascii_word
+}
+
+fn is_windows_1252_punct(byte: u8) -> bool {
+    WINDOWS_1252_PUNCT_BYTES.contains(&byte)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use encoding_rs::BIG5;
+    use encoding_rs::EUC_KR;
+    use encoding_rs::GBK;
+    use encoding_rs::ISO_8859_2;
+    use encoding_rs::ISO_8859_3;
+    use encoding_rs::ISO_8859_4;
+    use encoding_rs::ISO_8859_5;
+    use encoding_rs::ISO_8859_6;
+    use encoding_rs::ISO_8859_7;
+    use encoding_rs::ISO_8859_8;
+    use encoding_rs::ISO_8859_10;
+    use encoding_rs::ISO_8859_13;
+    use encoding_rs::SHIFT_JIS;
+    use encoding_rs::WINDOWS_874;
+    use encoding_rs::WINDOWS_1250;
+    use encoding_rs::WINDOWS_1251;
+    use encoding_rs::WINDOWS_1253;
+    use encoding_rs::WINDOWS_1254;
+    use encoding_rs::WINDOWS_1255;
+    use encoding_rs::WINDOWS_1256;
+    use encoding_rs::WINDOWS_1257;
+    use encoding_rs::WINDOWS_1258;
+    use pretty_assertions::assert_eq;
+
+    #[test]
+    fn test_utf8_passthrough() {
+        // Fast path: when UTF-8 is valid we should avoid copies and return as-is.
+        let utf8_text = "Hello, мир! 世界";
+        let bytes = utf8_text.as_bytes();
+        assert_eq!(bytes_to_string_smart(bytes), utf8_text);
+    }
+
+    #[test]
+    fn test_cp1251_russian_text() {
+        // Cyrillic text emitted by PowerShell/WSL in CP1251 should decode cleanly.
+        let bytes = b"\xEF\xF0\xE8\xEC\xE5\xF0"; // "пример" encoded with Windows-1251
+        assert_eq!(bytes_to_string_smart(bytes), "пример");
+    }
+
+    #[test]
+    fn test_cp1251_privet_word() {
+        // Regression: CP1251 words like "Привет" must not be mis-identified as Windows-1252.
+        let bytes = b"\xCF\xF0\xE8\xE2\xE5\xF2"; // "Привет" encoded with Windows-1251
+        assert_eq!(bytes_to_string_smart(bytes), "Привет");
+    }
+
+    #[test]
+    fn test_koi8_r_privet_word() {
+        // KOI8-R output should decode to the original Cyrillic as well.
+        let bytes = b"\xF0\xD2\xC9\xD7\xC5\xD4"; // "Привет" encoded with KOI8-R
+        assert_eq!(bytes_to_string_smart(bytes), "Привет");
+    }
+
+    #[test]
+    fn test_cp866_russian_text() {
+        // Legacy consoles (cmd.exe) commonly emit CP866 bytes for Cyrillic content.
+        let bytes = b"\xAF\xE0\xA8\xAC\xA5\xE0"; // "пример" encoded with CP866
+        assert_eq!(bytes_to_string_smart(bytes), "пример");
+    }
+
+    #[test]
+    fn test_cp866_uppercase_text() {
+        // Ensure the IBM866 heuristic still returns IBM866 for uppercase-only words.
+        let bytes = b"\x8F\x90\x88"; // "ПРИ" encoded with CP866 uppercase letters
+        assert_eq!(bytes_to_string_smart(bytes), "ПРИ");
+    }
+
+    #[test]
+    fn test_cp866_uppercase_followed_by_ascii() {
+        // Regression test: uppercase CP866 tokens next to ASCII text should not be treated as
+        // CP1252.
+        let bytes = b"\x8F\x90\x88 test"; // "ПРИ test" encoded with CP866 uppercase letters followed by ASCII
+        assert_eq!(bytes_to_string_smart(bytes), "ПРИ test");
+    }
+
+    #[test]
+    fn test_windows_1252_quotes() {
+        // Smart detection should map Windows-1252 punctuation into proper Unicode.
+        let bytes = b"\x93\x94test";
+        assert_eq!(bytes_to_string_smart(bytes), "\u{201C}\u{201D}test");
+    }
+
+    #[test]
+    fn test_windows_1252_multiple_quotes() {
+        // Longer snippets of punctuation (e.g., “foo” – “bar”) should still flip to CP1252.
+        let bytes = b"\x93foo\x94 \x96 \x93bar\x94";
+        assert_eq!(
+            bytes_to_string_smart(bytes),
+            "\u{201C}foo\u{201D} \u{2013} \u{201C}bar\u{201D}"
+        );
+    }
+
+    #[test]
+    fn test_windows_1252_privet_gibberish_is_preserved() {
+        // Windows-1252 cannot encode Cyrillic; if the input literally contains "ÐŸÑ..." we should not "fix" it.
+        let bytes = "ÐŸÑ€Ð¸Ð²ÐµÑ‚".as_bytes();
+        assert_eq!(bytes_to_string_smart(bytes), "ÐŸÑ€Ð¸Ð²ÐµÑ‚");
+    }
+
+    #[test]
+    fn test_iso8859_1_latin_text() {
+        // ISO-8859-1 (code page 28591) is the Latin segment used by LatArCyrHeb.
+        // encoding_rs unifies ISO-8859-1 with Windows-1252, so reuse that constant here.
+        let (encoded, _, had_errors) = WINDOWS_1252.encode("Hello");
+        assert!(!had_errors, "failed to encode Latin sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Hello");
+    }
+
+    #[test]
+    fn test_iso8859_2_central_european_text() {
+        // ISO-8859-2 (code page 28592) covers additional Central European glyphs.
+        let (encoded, _, had_errors) = ISO_8859_2.encode("Příliš žluťoučký kůň");
+        assert!(!had_errors, "failed to encode ISO-8859-2 sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "Příliš žluťoučký kůň"
+        );
+    }
+
+    #[test]
+    fn test_iso8859_3_south_europe_text() {
+        // ISO-8859-3 (code page 28593) adds support for Maltese/Esperanto letters.
+        // chardetng rarely distinguishes ISO-8859-3 from neighboring Latin code pages, so we rely on
+        // an ASCII-only sample to ensure round-tripping still succeeds.
+        let (encoded, _, had_errors) = ISO_8859_3.encode("Esperanto and Maltese");
+        assert!(!had_errors, "failed to encode ISO-8859-3 sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "Esperanto and Maltese"
+        );
+    }
+
+    #[test]
+    fn test_iso8859_4_baltic_text() {
+        // ISO-8859-4 (code page 28594) targets the Baltic/Nordic repertoire.
+        let sample = "Šis ir rakstzīmju kodēšanas tests. Dažās valodās, kurās tiek \
+                      izmantotas latīņu valodas burti, lēmuma pieņemšanai mums ir nepieciešams \
+                      vairāk ieguldījuma.";
+        let (encoded, _, had_errors) = ISO_8859_4.encode(sample);
+        assert!(!had_errors, "failed to encode ISO-8859-4 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
+    }
+
+    #[test]
+    fn test_iso8859_5_cyrillic_text() {
+        // ISO-8859-5 (code page 28595) covers the Cyrillic portion.
+        let (encoded, _, had_errors) = ISO_8859_5.encode("Привет");
+        assert!(!had_errors, "failed to encode Cyrillic sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Привет");
+    }
+
+    #[test]
+    fn test_iso8859_6_arabic_text() {
+        // ISO-8859-6 (code page 28596) covers the Arabic glyphs.
+        let (encoded, _, had_errors) = ISO_8859_6.encode("مرحبا");
+        assert!(!had_errors, "failed to encode Arabic sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "مرحبا");
+    }
+
+    #[test]
+    fn test_iso8859_7_greek_text() {
+        // ISO-8859-7 (code page 28597) is used for Greek locales.
+        let (encoded, _, had_errors) = ISO_8859_7.encode("Καλημέρα");
+        assert!(!had_errors, "failed to encode ISO-8859-7 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Καλημέρα");
+    }
+
+    #[test]
+    fn test_iso8859_8_hebrew_text() {
+        // ISO-8859-8 (code page 28598) covers the Hebrew glyphs.
+        let (encoded, _, had_errors) = ISO_8859_8.encode("שלום");
+        assert!(!had_errors, "failed to encode Hebrew sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "שלום");
+    }
+
+    #[test]
+    fn test_iso8859_9_turkish_text() {
+        // ISO-8859-9 (code page 28599) mirrors Latin-1 but inserts Turkish letters.
+        // encoding_rs exposes the equivalent Windows-1254 mapping.
+        let (encoded, _, had_errors) = WINDOWS_1254.encode("İstanbul");
+        assert!(!had_errors, "failed to encode ISO-8859-9 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "İstanbul");
+    }
+
+    #[test]
+    fn test_iso8859_10_nordic_text() {
+        // ISO-8859-10 (code page 28600) adds additional Nordic letters.
+        let sample = "Þetta er prófun fyrir Ægir og Øystein.";
+        let (encoded, _, had_errors) = ISO_8859_10.encode(sample);
+        assert!(!had_errors, "failed to encode ISO-8859-10 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
+    }
+
+    #[test]
+    fn test_iso8859_11_thai_text() {
+        // ISO-8859-11 (code page 28601) mirrors TIS-620 / Windows-874 for Thai.
+        let sample = "ภาษาไทยสำหรับการทดสอบ ISO-8859-11";
+        // encoding_rs exposes the equivalent Windows-874 encoding, so use that constant.
+        let (encoded, _, had_errors) = WINDOWS_874.encode(sample);
+        assert!(!had_errors, "failed to encode ISO-8859-11 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), sample);
+    }
+
+    // ISO-8859-12 was never standardized, and encodings 14–16 cannot be distinguished reliably
+    // without the heuristics we removed (chardetng generally reports neighboring Latin pages), so
+    // we intentionally omit coverage for those slots until the detector can identify them.
+
+    #[test]
+    fn test_iso8859_13_baltic_text() {
+        // ISO-8859-13 (code page 28603) is common across Baltic languages.
+        let (encoded, _, had_errors) = ISO_8859_13.encode("Sveiki");
+        assert!(!had_errors, "failed to encode ISO-8859-13 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Sveiki");
+    }
+
+    #[test]
+    fn test_windows_1250_central_european_text() {
+        let (encoded, _, had_errors) = WINDOWS_1250.encode("Příliš žluťoučký kůň");
+        assert!(!had_errors, "failed to encode Central European sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "Příliš žluťoučký kůň"
+        );
+    }
+
+    #[test]
+    fn test_windows_1251_encoded_text() {
+        let (encoded, _, had_errors) = WINDOWS_1251.encode("Привет из Windows-1251");
+        assert!(!had_errors, "failed to encode Windows-1251 sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "Привет из Windows-1251"
+        );
+    }
+
+    #[test]
+    fn test_windows_1253_greek_text() {
+        let (encoded, _, had_errors) = WINDOWS_1253.encode("Γειά σου");
+        assert!(!had_errors, "failed to encode Greek sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Γειά σου");
+    }
+
+    #[test]
+    fn test_windows_1254_turkish_text() {
+        let (encoded, _, had_errors) = WINDOWS_1254.encode("İstanbul");
+        assert!(!had_errors, "failed to encode Turkish sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "İstanbul");
+    }
+
+    #[test]
+    fn test_windows_1255_hebrew_text() {
+        let (encoded, _, had_errors) = WINDOWS_1255.encode("שלום");
+        assert!(!had_errors, "failed to encode Windows-1255 Hebrew sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "שלום");
+    }
+
+    #[test]
+    fn test_windows_1256_arabic_text() {
+        let (encoded, _, had_errors) = WINDOWS_1256.encode("مرحبا");
+        assert!(!had_errors, "failed to encode Windows-1256 Arabic sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "مرحبا");
+    }
+
+    #[test]
+    fn test_windows_1257_baltic_text() {
+        let (encoded, _, had_errors) = WINDOWS_1257.encode("Pērkons");
+        assert!(!had_errors, "failed to encode Baltic sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Pērkons");
+    }
+
+    #[test]
+    fn test_windows_1258_vietnamese_text() {
+        let (encoded, _, had_errors) = WINDOWS_1258.encode("Xin chào");
+        assert!(!had_errors, "failed to encode Vietnamese sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "Xin chào");
+    }
+
+    #[test]
+    fn test_windows_874_thai_text() {
+        let (encoded, _, had_errors) = WINDOWS_874.encode("สวัสดีครับ นี่คือการทดสอบภาษาไทย");
+        assert!(!had_errors, "failed to encode Thai sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "สวัสดีครับ นี่คือการทดสอบภาษาไทย"
+        );
+    }
+
+    #[test]
+    fn test_windows_932_shift_jis_text() {
+        let (encoded, _, had_errors) = SHIFT_JIS.encode("こんにちは");
+        assert!(!had_errors, "failed to encode Shift-JIS sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "こんにちは");
+    }
+
+    #[test]
+    fn test_windows_936_gbk_text() {
+        let (encoded, _, had_errors) = GBK.encode("你好，世界，这是一个测试");
+        assert!(!had_errors, "failed to encode GBK sample");
+        assert_eq!(
+            bytes_to_string_smart(encoded.as_ref()),
+            "你好，世界，这是一个测试"
+        );
+    }
+
+    #[test]
+    fn test_windows_949_korean_text() {
+        let (encoded, _, had_errors) = EUC_KR.encode("안녕하세요");
+        assert!(!had_errors, "failed to encode Korean sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "안녕하세요");
+    }
+
+    #[test]
+    fn test_windows_950_big5_text() {
+        let (encoded, _, had_errors) = BIG5.encode("繁體");
+        assert!(!had_errors, "failed to encode Big5 sample");
+        assert_eq!(bytes_to_string_smart(encoded.as_ref()), "繁體");
+    }
+
+    #[test]
+    fn test_latin1_cafe() {
+        // Latin-1 bytes remain common in Western-European locales; decode them directly.
+        let bytes = b"caf\xE9"; // codespell:ignore caf
+        assert_eq!(bytes_to_string_smart(bytes), "café");
+    }
+
+    #[test]
+    fn test_preserves_ansi_sequences() {
+        // ANSI escape sequences should survive regardless of the detected encoding.
+        let bytes = b"\x1b[31mred\x1b[0m";
+        assert_eq!(bytes_to_string_smart(bytes), "\x1b[31mred\x1b[0m");
+    }
+
+    #[test]
+    fn test_fallback_to_lossy() {
+        // Completely invalid sequences fall back to the old lossy behavior.
+        let invalid_bytes = [0xFF, 0xFE, 0xFD];
+        let result = bytes_to_string_smart(&invalid_bytes);
+        assert_eq!(result, String::from_utf8_lossy(&invalid_bytes));
+    }
+}
diff --git a/codex-rs/core/tests/suite/mod.rs b/codex-rs/core/tests/suite/mod.rs
index 60b828b1c..b87766361 100644
--- a/codex-rs/core/tests/suite/mod.rs
+++ b/codex-rs/core/tests/suite/mod.rs
@@ -49,6 +49,7 @@ mod seatbelt;
 mod shell_serialization;
 mod stream_error_allows_next_turn;
 mod stream_no_completed;
+mod text_encoding_fix;
 mod tool_harness;
 mod tool_parallelism;
 mod tools;
diff --git a/codex-rs/core/tests/suite/text_encoding_fix.rs b/codex-rs/core/tests/suite/text_encoding_fix.rs
new file mode 100644
index 000000000..ecebb1e42
--- /dev/null
+++ b/codex-rs/core/tests/suite/text_encoding_fix.rs
@@ -0,0 +1,77 @@
+//! Integration test for the text encoding fix for issue #6178.
+//!
+//! These tests simulate VSCode's shell preview on Windows/WSL where the output
+//! may be encoded with a legacy code page before it reaches Codex.
+
+use codex_core::exec::StreamOutput;
+use pretty_assertions::assert_eq;
+
+#[test]
+fn test_utf8_shell_output() {
+    // Baseline: UTF-8 output should bypass the detector and remain unchanged.
+    assert_eq!(decode_shell_output("пример".as_bytes()), "пример");
+}
+
+#[test]
+fn test_cp1251_shell_output() {
+    // VS Code shells on Windows frequently surface CP1251 bytes for Cyrillic text.
+    assert_eq!(decode_shell_output(b"\xEF\xF0\xE8\xEC\xE5\xF0"), "пример");
+}
+
+#[test]
+fn test_cp866_shell_output() {
+    // Native cmd.exe still defaults to CP866; make sure we recognize that too.
+    assert_eq!(decode_shell_output(b"\xAF\xE0\xA8\xAC\xA5\xE0"), "пример");
+}
+
+#[test]
+fn test_windows_1252_smart_decoding() {
+    // Smart detection should turn fancy quotes/dashes into the proper Unicode glyphs.
+    assert_eq!(
+        decode_shell_output(b"\x93\x94 test \x96 dash"),
+        "\u{201C}\u{201D} test \u{2013} dash"
+    );
+}
+
+#[test]
+fn test_smart_decoding_improves_over_lossy_utf8() {
+    // Regression guard: String::from_utf8_lossy() alone used to emit replacement chars here.
+    let bytes = b"\x93\x94 test \x96 dash";
+    assert!(
+        String::from_utf8_lossy(bytes).contains('\u{FFFD}'),
+        "lossy UTF-8 should inject replacement chars"
+    );
+    assert_eq!(
+        decode_shell_output(bytes),
+        "\u{201C}\u{201D} test \u{2013} dash",
+        "smart decoding should keep curly quotes intact"
+    );
+}
+
+#[test]
+fn test_mixed_ascii_and_legacy_encoding() {
+    // Commands tend to mix ASCII status text with Latin-1 bytes (e.g. café).
+    assert_eq!(decode_shell_output(b"Output: caf\xE9"), "Output: café"); // codespell:ignore caf
+}
+
+#[test]
+fn test_pure_latin1_shell_output() {
+    // Latin-1 by itself should still decode correctly (regression coverage for the older tests).
+    assert_eq!(decode_shell_output(b"caf\xE9"), "café"); // codespell:ignore caf
+}
+
+#[test]
+fn test_invalid_bytes_still_fall_back_to_lossy() {
+    // If detection fails, we still want the user to see replacement characters.
+    let bytes = b"\xFF\xFE\xFD";
+    assert_eq!(decode_shell_output(bytes), String::from_utf8_lossy(bytes));
+}
+
+fn decode_shell_output(bytes: &[u8]) -> String {
+    StreamOutput {
+        text: bytes.to_vec(),
+        truncated_after_lines: None,
+    }
+    .from_utf8_lossy()
+    .text
+}