voice transcription (#3381)

Adds voice transcription on press-and-hold of spacebar.


https://github.com/user-attachments/assets/85039314-26f3-46d1-a83b-8c4a4a1ecc21

---------

Co-authored-by: Codex <199175422+chatgpt-codex-connector[bot]@users.noreply.github.com>
Co-authored-by: David Zbarsky <zbarsky@openai.com>
This commit is contained in:
Jeremy Rose 2026-02-23 14:15:18 -08:00 committed by GitHub
parent 50953ea39a
commit 855e275591
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 2538 additions and 446 deletions

View file

@ -1,4 +1,11 @@
load("@apple_support//xcode:xcode_config.bzl", "xcode_config")
load("@rules_cc//cc:defs.bzl", "cc_shared_library")
cc_shared_library(
name = "clang",
deps = ["@llvm-project//clang:libclang"],
visibility = ["//visibility:public"],
)
xcode_config(name = "disable_xcode")

View file

@ -1,5 +1,7 @@
module(name = "codex")
bazel_dep(name = "platforms", version = "1.0.0")
bazel_dep(name = "toolchains_llvm_bootstrapped", version = "0.5.3")
bazel_dep(name = "toolchains_llvm_bootstrapped", version = "0.5.6")
single_version_override(
module_name = "toolchains_llvm_bootstrapped",
patch_strip = 1,
@ -8,6 +10,8 @@ single_version_override(
],
)
register_toolchains("@toolchains_llvm_bootstrapped//toolchain:all")
osx = use_extension("@toolchains_llvm_bootstrapped//extensions:osx.bzl", "osx")
osx.framework(name = "ApplicationServices")
osx.framework(name = "AppKit")
@ -16,8 +20,12 @@ osx.framework(name = "CoreFoundation")
osx.framework(name = "CoreGraphics")
osx.framework(name = "CoreServices")
osx.framework(name = "CoreText")
osx.framework(name = "AudioToolbox")
osx.framework(name = "CFNetwork")
osx.framework(name = "FontServices")
osx.framework(name = "AudioUnit")
osx.framework(name = "CoreAudio")
osx.framework(name = "CoreAudioTypes")
osx.framework(name = "Foundation")
osx.framework(name = "ImageIO")
osx.framework(name = "IOKit")
@ -25,10 +33,7 @@ osx.framework(name = "Kernel")
osx.framework(name = "OSLog")
osx.framework(name = "Security")
osx.framework(name = "SystemConfiguration")
register_toolchains(
"@toolchains_llvm_bootstrapped//toolchain:all",
)
use_repo(osx, "macosx15.4.sdk")
# Needed to disable xcode...
bazel_dep(name = "apple_support", version = "2.1.0")
@ -39,9 +44,9 @@ bazel_dep(name = "rules_rs", version = "0.0.23")
# Special toolchains branch
archive_override(
module_name = "rules_rs",
integrity = "sha256-YbDRjZos4UmfIPY98znK1BgBWRQ1/ui3CtL6RqxE30I=",
strip_prefix = "rules_rs-6cf3d940fdc48baf3ebd6c37daf8e0be8fc73ecb",
url = "https://github.com/dzbarsky/rules_rs/archive/6cf3d940fdc48baf3ebd6c37daf8e0be8fc73ecb.tar.gz",
integrity = "sha256-O34UF4H7b1Qacu3vlu2Od4ILGVApzg5j1zl952SFL3w=",
strip_prefix = "rules_rs-097123c2aa72672e371e69e7035869f5a45c7b2b",
url = "https://github.com/dzbarsky/rules_rs/archive/097123c2aa72672e371e69e7035869f5a45c7b2b.tar.gz",
)
rules_rust = use_extension("@rules_rs//rs/experimental:rules_rust.bzl", "rules_rust")
@ -134,6 +139,9 @@ crate.annotation(
"OPENSSL_NO_VENDOR": "1",
"OPENSSL_STATIC": "1",
},
crate_features = [
"dep:openssl-src",
],
crate = "openssl-sys",
data = ["@openssl//:gen_dir"],
)
@ -145,6 +153,28 @@ crate.annotation(
workspace_cargo_toml = "rust/runfiles/Cargo.toml",
)
llvm = use_extension("@toolchains_llvm_bootstrapped//extensions:llvm.bzl", "llvm")
use_repo(llvm, "llvm-project")
crate.annotation(
# Provide the hermetic SDK path so the build script doesn't try to invoke an unhermetic `xcrun --show-sdk-path`.
build_script_data = [
"@macosx15.4.sdk//sysroot",
],
build_script_env = {
"BINDGEN_EXTRA_CLANG_ARGS": "-isystem $(location @toolchains_llvm_bootstrapped//:builtin_headers)",
"COREAUDIO_SDK_PATH": "$(location @macosx15.4.sdk//sysroot)",
"LIBCLANG_PATH": "$(location @codex//:clang)",
},
build_script_tools = [
"@codex//:clang",
"@toolchains_llvm_bootstrapped//:builtin_headers",
],
crate = "coreaudio-sys",
)
inject_repo(crate, "codex", "toolchains_llvm_bootstrapped", "macosx15.4.sdk")
# Fix readme inclusions
crate.annotation(
crate = "windows-link",
@ -175,6 +205,17 @@ crate.annotation(
gen_build_script = "off",
deps = [":windows_import_lib"],
)
bazel_dep(name = "alsa_lib", version = "1.2.9.bcr.4")
crate.annotation(
crate = "alsa-sys",
gen_build_script = "off",
deps = ["@alsa_lib"],
)
inject_repo(crate, "alsa_lib")
use_repo(crate, "crates")
rbe_platform_repository = use_repo_rule("//:rbe.bzl", "rbe_platform_repository")

278
MODULE.bazel.lock generated

File diff suppressed because one or more lines are too long

924
codex-rs/Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -415,6 +415,9 @@
"use_linux_sandbox_bwrap": {
"type": "boolean"
},
"voice_transcription": {
"type": "boolean"
},
"web_search": {
"type": "boolean"
},
@ -1696,6 +1699,9 @@
"use_linux_sandbox_bwrap": {
"type": "boolean"
},
"voice_transcription": {
"type": "boolean"
},
"web_search": {
"type": "boolean"
},

View file

@ -138,6 +138,8 @@ pub enum Feature {
CollaborationModes,
/// Enable personality selection in the TUI.
Personality,
/// Enable voice transcription in the TUI composer.
VoiceTranscription,
/// Prevent idle system sleep while a turn is actively running.
PreventIdleSleep,
/// Use the Responses API WebSocket transport for OpenAI by default.
@ -627,6 +629,12 @@ pub const FEATURES: &[FeatureSpec] = &[
stage: Stage::Stable,
default_enabled: true,
},
FeatureSpec {
id: Feature::VoiceTranscription,
key: "voice_transcription",
stage: Stage::UnderDevelopment,
default_enabled: false,
},
FeatureSpec {
id: Feature::PreventIdleSleep,
key: "prevent_idle_sleep",

View file

@ -431,7 +431,7 @@ fn build_authorize_url(
("redirect_uri".to_string(), redirect_uri.to_string()),
(
"scope".to_string(),
"openid profile email offline_access".to_string(),
"openid profile email offline_access api.model.audio.request".to_string(),
),
(
"code_challenge".to_string(),

View file

@ -13,10 +13,12 @@ name = "codex_tui"
path = "src/lib.rs"
[features]
default = ["voice-input"]
# Enable vt100-based tests (emulator) when running with `--features vt100-tests`.
vt100-tests = []
# Gate verbose debug logging inside the TUI implementation.
debug-logs = []
voice-input = ["dep:cpal", "dep:hound"]
[lints]
workspace = true
@ -68,7 +70,7 @@ ratatui = { workspace = true, features = [
] }
ratatui-macros = { workspace = true }
regex-lite = { workspace = true }
reqwest = { version = "0.12", features = ["json"] }
reqwest = { workspace = true, features = ["json", "multipart"] }
rmcp = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true, features = ["preserve_order"] }
@ -104,6 +106,10 @@ uuid = { workspace = true }
codex-windows-sandbox = { workspace = true }
tokio-util = { workspace = true, features = ["time"] }
[target.'cfg(not(target_os = "linux"))'.dependencies]
cpal = { version = "0.15", optional = true }
hound = { version = "3.5", optional = true }
[target.'cfg(unix)'.dependencies]
libc = { workspace = true }

View file

@ -1547,6 +1547,8 @@ impl App {
{
return Ok(AppRunControl::Continue);
}
// Allow widgets to process any pending timers before rendering.
self.chat_widget.pre_draw_tick();
tui.draw(
self.chat_widget.desired_height(tui.terminal.size()?.width),
|frame| {
@ -2703,6 +2705,22 @@ impl App {
));
}
},
#[cfg(not(target_os = "linux"))]
AppEvent::TranscriptionComplete { id, text } => {
self.chat_widget.replace_transcription(&id, &text);
}
#[cfg(not(target_os = "linux"))]
AppEvent::TranscriptionFailed { id, error: _ } => {
self.chat_widget.remove_transcription_placeholder(&id);
}
#[cfg(not(target_os = "linux"))]
AppEvent::UpdateRecordingMeter { id, text } => {
// Update in place to preserve the element id for subsequent frames.
let updated = self.chat_widget.update_transcription_in_place(&id, &text);
if updated {
tui.frame_requester().schedule_frame();
}
}
AppEvent::StatusLineSetup { items } => {
let ids = items.iter().map(ToString::to_string).collect::<Vec<_>>();
let edit = codex_core::config::edit::status_line_items_edit(&ids);
@ -3106,7 +3124,7 @@ impl App {
self.chat_widget.handle_key_event(key_event);
}
_ => {
// Ignore Release key events.
self.chat_widget.handle_key_event(key_event);
}
};
}

View file

@ -321,6 +321,29 @@ pub(crate) enum AppEvent {
/// Re-open the permissions presets popup.
OpenPermissionsPopup,
/// Live update for the in-progress voice recording placeholder. Carries
/// the placeholder `id` and the text to display (e.g., an ASCII meter).
#[cfg(not(target_os = "linux"))]
UpdateRecordingMeter {
id: String,
text: String,
},
/// Voice transcription finished for the given placeholder id.
#[cfg(not(target_os = "linux"))]
TranscriptionComplete {
id: String,
text: String,
},
/// Voice transcription failed; remove the placeholder identified by `id`.
#[cfg(not(target_os = "linux"))]
TranscriptionFailed {
id: String,
#[allow(dead_code)]
error: String,
},
/// Open the branch picker option from the review popup.
OpenReviewBranchPicker(PathBuf),

View file

@ -109,6 +109,17 @@
//! edits and renders a placeholder prompt instead of the editable textarea. This is part of the
//! overall state machine, since it affects which transitions are even possible from a given UI
//! state.
//!
//! # Voice Hold-To-Talk Without Key Release
//!
//! On terminals that do not report `KeyEventKind::Release`, space hold-to-talk uses repeated
//! space key events as "still held" evidence:
//!
//! - For pending holds (non-empty composer), if timeout elapses without any repeated space event,
//! we treat the key as a normal typed space.
//! - If repeated space events are seen before timeout, we proceed with hold-to-talk.
//! - While recording, repeated space events keep the recording alive; if they stop for a short
//! window, we stop and transcribe.
use crate::bottom_pane::footer::mode_indicator_line;
use crate::key_hint;
use crate::key_hint::KeyBinding;
@ -191,6 +202,7 @@ use crate::bottom_pane::textarea::TextAreaState;
use crate::clipboard_paste::normalize_pasted_path;
use crate::clipboard_paste::pasted_image_format;
use crate::history_cell;
use crate::tui::FrameRequester;
use crate::ui_consts::LIVE_PREFIX_COLS;
use codex_chatgpt::connectors;
use codex_chatgpt::connectors::AppInfo;
@ -202,9 +214,17 @@ use std::collections::HashSet;
use std::collections::VecDeque;
use std::ops::Range;
use std::path::PathBuf;
use std::sync::Arc;
#[cfg(not(target_os = "linux"))]
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering;
#[cfg(not(target_os = "linux"))]
use std::thread;
use std::time::Duration;
use std::time::Instant;
#[cfg(not(target_os = "linux"))]
use tokio::runtime::Handle;
/// If the pasted content exceeds this number of characters, replace it with a
/// placeholder in the UI.
const LARGE_PASTE_CHAR_THRESHOLD: usize = 1000;
@ -284,6 +304,35 @@ impl ChatComposerConfig {
}
}
}
#[derive(Default)]
struct VoiceState {
transcription_enabled: bool,
// Spacebar hold-to-talk state.
space_hold_started_at: Option<Instant>,
space_hold_element_id: Option<String>,
space_hold_trigger: Option<Arc<AtomicBool>>,
key_release_supported: bool,
space_hold_repeat_seen: bool,
#[cfg(not(target_os = "linux"))]
voice: Option<crate::voice::VoiceCapture>,
#[cfg(not(target_os = "linux"))]
recording_placeholder_id: Option<String>,
#[cfg(not(target_os = "linux"))]
space_recording_started_at: Option<Instant>,
#[cfg(not(target_os = "linux"))]
space_recording_last_repeat_at: Option<Instant>,
}
impl VoiceState {
fn new(key_release_supported: bool) -> Self {
Self {
key_release_supported,
..Default::default()
}
}
}
pub(crate) struct ChatComposer {
textarea: TextArea,
textarea_state: RefCell<TextAreaState>,
@ -299,10 +348,14 @@ pub(crate) struct ChatComposer {
pending_pastes: Vec<(String, String)>,
large_paste_counters: HashMap<usize, usize>,
has_focus: bool,
frame_requester: Option<FrameRequester>,
/// Invariant: attached images are labeled in vec order as
/// `[Image #M+1]..[Image #N]`, where `M` is the number of remote images.
attached_images: Vec<AttachedImage>,
placeholder_text: String,
voice_state: VoiceState,
// Spinner control flags keyed by placeholder id; set to true to stop.
spinner_stop_flags: HashMap<String, Arc<AtomicBool>>,
is_task_running: bool,
/// When false, the composer is temporarily read-only (e.g. during sandbox setup).
input_enabled: bool,
@ -320,6 +373,9 @@ pub(crate) struct ChatComposer {
selected_remote_image_index: Option<usize>,
footer_flash: Option<FooterFlash>,
context_window_percent: Option<i64>,
// Monotonically increasing identifier for textarea elements we insert.
#[cfg(not(target_os = "linux"))]
next_element_id: u64,
context_window_used_tokens: Option<i64>,
skills: Option<Vec<SkillMetadata>>,
connectors_snapshot: Option<ConnectorsSnapshot>,
@ -407,8 +463,11 @@ impl ChatComposer {
pending_pastes: Vec::new(),
large_paste_counters: HashMap::new(),
has_focus: has_input_focus,
frame_requester: None,
attached_images: Vec::new(),
placeholder_text,
voice_state: VoiceState::new(enhanced_keys_supported),
spinner_stop_flags: HashMap::new(),
is_task_running: false,
input_enabled: true,
input_disabled_placeholder: None,
@ -421,6 +480,8 @@ impl ChatComposer {
selected_remote_image_index: None,
footer_flash: None,
context_window_percent: None,
#[cfg(not(target_os = "linux"))]
next_element_id: 0,
context_window_used_tokens: None,
skills: None,
connectors_snapshot: None,
@ -442,6 +503,17 @@ impl ChatComposer {
this
}
#[cfg(not(target_os = "linux"))]
fn next_id(&mut self) -> String {
let id = self.next_element_id;
self.next_element_id = self.next_element_id.wrapping_add(1);
id.to_string()
}
pub(crate) fn set_frame_requester(&mut self, frame_requester: FrameRequester) {
self.frame_requester = Some(frame_requester);
}
pub fn set_skill_mentions(&mut self, skills: Option<Vec<SkillMetadata>>) {
self.skills = skills;
}
@ -505,6 +577,23 @@ impl ChatComposer {
pub fn set_personality_command_enabled(&mut self, enabled: bool) {
self.personality_command_enabled = enabled;
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.voice_state.transcription_enabled = enabled;
if !enabled {
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
}
}
#[cfg(not(target_os = "linux"))]
fn voice_transcription_enabled(&self) -> bool {
self.voice_state.transcription_enabled && cfg!(not(target_os = "linux"))
}
/// Centralized feature gating keeps config checks out of call sites.
fn popups_enabled(&self) -> bool {
self.config.popups_enabled
@ -568,6 +657,20 @@ impl ChatComposer {
}
}
pub fn cursor_pos(&self, area: Rect) -> Option<(u16, u16)> {
if !self.input_enabled {
return None;
}
// Hide the cursor while recording voice input.
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return None;
}
let [_, _, textarea_rect, _] = self.layout_areas(area);
let state = *self.textarea_state.borrow();
self.textarea.cursor_pos_with_state(textarea_rect, state)
}
/// Returns true if the composer currently contains no user-entered input.
pub(crate) fn is_empty(&self) -> bool {
self.textarea.is_empty()
@ -621,6 +724,10 @@ impl ChatComposer {
/// In all cases, clears any paste-burst Enter suppression state so a real paste cannot affect
/// the next user Enter key, then syncs popup state.
pub fn handle_paste(&mut self, pasted: String) -> bool {
#[cfg(not(target_os = "linux"))]
if self.voice_state.voice.is_some() {
return false;
}
let pasted = pasted.replace("\r\n", "\n").replace('\r', "\n");
let char_count = pasted.chars().count();
if char_count > LARGE_PASTE_CHAR_THRESHOLD {
@ -633,9 +740,8 @@ impl ChatComposer {
{
self.textarea.insert_str(" ");
} else {
self.textarea.insert_str(&pasted);
self.insert_str(&pasted);
}
// Explicit paste events should not trigger Enter suppression.
self.paste_burst.clear_after_explicit_paste();
self.sync_popups();
true
@ -866,6 +972,9 @@ impl ChatComposer {
local_image_paths: Vec<PathBuf>,
mention_bindings: Vec<MentionBinding>,
) {
#[cfg(not(target_os = "linux"))]
self.stop_all_transcription_spinners();
// Clear any existing content, placeholders, and attachments first.
self.textarea.set_text_clearing_elements("");
self.pending_pastes.clear();
@ -1124,20 +1233,56 @@ impl ChatComposer {
/// Handle a key event coming from the main UI.
pub fn handle_key_event(&mut self, key_event: KeyEvent) -> (InputResult, bool) {
if matches!(key_event.kind, KeyEventKind::Release) {
self.voice_state.key_release_supported = true;
}
// Timer-based conversion is handled in the pre-draw tick.
// If recording, stop on Space release when supported. On terminals without key-release
// events, Space repeat events are handled as "still held" and stop is driven by timeout
// in `process_space_hold_trigger`.
if let Some(result) = self.handle_key_event_while_recording(key_event) {
return result;
}
if !self.input_enabled {
return (InputResult::None, false);
}
// Outside of recording, ignore all key releases globally except for Space,
// which is handled explicitly for hold-to-talk behavior below.
if matches!(key_event.kind, KeyEventKind::Release)
&& !matches!(key_event.code, KeyCode::Char(' '))
{
return (InputResult::None, false);
}
// If a space hold is pending and another non-space key is pressed, cancel the hold
// and convert the element into a plain space.
if self.voice_state.space_hold_started_at.is_some()
&& !matches!(key_event.code, KeyCode::Char(' '))
{
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
// fall through to normal handling of this other key
}
if let Some(result) = self.handle_voice_space_key_event(&key_event) {
return result;
}
let result = match &mut self.active_popup {
ActivePopup::Command(_) => self.handle_key_event_with_slash_popup(key_event),
ActivePopup::File(_) => self.handle_key_event_with_file_popup(key_event),
ActivePopup::Skill(_) => self.handle_key_event_with_skill_popup(key_event),
ActivePopup::None => self.handle_key_event_without_popup(key_event),
};
// Update (or hide/show) popup after processing the key.
self.sync_popups();
result
}
@ -2535,6 +2680,7 @@ impl ChatComposer {
// -------------------------------------------------------------
KeyEvent {
code: KeyCode::Up | KeyCode::Down,
kind: KeyEventKind::Press | KeyEventKind::Repeat,
..
}
| KeyEvent {
@ -2588,6 +2734,136 @@ impl ChatComposer {
}
}
#[cfg(target_os = "linux")]
fn handle_voice_space_key_event(
&mut self,
_key_event: &KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_voice_space_key_event(
&mut self,
key_event: &KeyEvent,
) -> Option<(InputResult, bool)> {
if !self.voice_transcription_enabled() || !matches!(key_event.code, KeyCode::Char(' ')) {
return None;
}
match key_event.kind {
KeyEventKind::Press => {
if self.paste_burst.is_active() {
return None;
}
// If textarea is empty, start recording immediately without inserting a space.
if self.textarea.text().is_empty() {
if self.start_recording_with_placeholder() {
return Some((InputResult::None, true));
}
return None;
}
// If a hold is already pending, swallow further press events to
// avoid inserting multiple spaces and resetting the timer on key repeat.
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported {
self.voice_state.space_hold_repeat_seen = true;
}
return Some((InputResult::None, false));
}
// Insert a named element that renders as a space so we can later
// remove it on timeout or convert it to a plain space on release.
let elem_id = self.next_id();
self.textarea.insert_named_element(" ", elem_id.clone());
// Record pending hold metadata.
self.voice_state.space_hold_started_at = Some(Instant::now());
self.voice_state.space_hold_element_id = Some(elem_id);
self.voice_state.space_hold_repeat_seen = false;
// Spawn a delayed task to flip an atomic flag; we check it on next key event.
let flag = Arc::new(AtomicBool::new(false));
let frame = self.frame_requester.clone();
Self::schedule_space_hold_timer(flag.clone(), frame);
self.voice_state.space_hold_trigger = Some(flag);
Some((InputResult::None, true))
}
// If we see a repeat before release, handling occurs in the top-level pending block.
KeyEventKind::Repeat => {
// Swallow repeats while a hold is pending to avoid extra spaces.
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported {
self.voice_state.space_hold_repeat_seen = true;
}
return Some((InputResult::None, false));
}
// Fallback: if no pending hold, treat as normal input.
None
}
// Space release without pending (fallback): treat as normal input.
KeyEventKind::Release => {
// If a hold is pending, convert the element to a plain space and clear state.
self.voice_state.space_hold_started_at = None;
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
Some((InputResult::None, true))
}
}
}
#[cfg(target_os = "linux")]
fn handle_key_event_while_recording(
&mut self,
_key_event: KeyEvent,
) -> Option<(InputResult, bool)> {
None
}
#[cfg(not(target_os = "linux"))]
fn handle_key_event_while_recording(
&mut self,
key_event: KeyEvent,
) -> Option<(InputResult, bool)> {
if self.voice_state.voice.is_some() {
let should_stop = if self.voice_state.key_release_supported {
match key_event.kind {
KeyEventKind::Release => matches!(key_event.code, KeyCode::Char(' ')),
KeyEventKind::Press | KeyEventKind::Repeat => {
!matches!(key_event.code, KeyCode::Char(' '))
}
}
} else {
match key_event.kind {
KeyEventKind::Release => matches!(key_event.code, KeyCode::Char(' ')),
KeyEventKind::Press | KeyEventKind::Repeat => {
if matches!(key_event.code, KeyCode::Char(' ')) {
self.voice_state.space_recording_last_repeat_at = Some(Instant::now());
false
} else {
true
}
}
}
};
if should_stop {
let needs_redraw = self.stop_recording_and_start_transcription();
return Some((InputResult::None, needs_redraw));
}
// Swallow non-stopping keys while recording.
return Some((InputResult::None, false));
}
None
}
fn is_bang_shell_command(&self) -> bool {
self.textarea.text().trim_start().starts_with('!')
}
@ -2607,8 +2883,6 @@ impl ChatComposer {
true
}
FlushResult::Typed(ch) => {
// Mirror insert_str() behavior so popups stay in sync when a
// pending fast char flushes as normal typed input.
self.textarea.insert_str(ch.to_string().as_str());
self.sync_popups();
true
@ -2632,6 +2906,12 @@ impl ChatComposer {
/// otherwise `clear_window_after_non_char()` can leave buffered text waiting without a
/// timestamp to time out against.
fn handle_input_basic(&mut self, input: KeyEvent) -> (InputResult, bool) {
// Ignore key releases here to avoid treating them as additional input
// (e.g., appending the same character twice via paste-burst logic).
if !matches!(input.kind, KeyEventKind::Press | KeyEventKind::Repeat) {
return (InputResult::None, false);
}
self.handle_input_basic_with_time(input, Instant::now())
}
@ -2897,7 +3177,7 @@ impl ChatComposer {
.map(|items| if items.is_empty() { 0 } else { 1 })
}
fn sync_popups(&mut self) {
pub(crate) fn sync_popups(&mut self) {
self.sync_slash_command_elements();
if !self.popups_enabled() {
self.active_popup = ActivePopup::None;
@ -3311,6 +3591,11 @@ impl ChatComposer {
self.has_focus = has_focus;
}
#[cfg(not(target_os = "linux"))]
pub(crate) fn is_recording(&self) -> bool {
self.voice_state.voice.is_some()
}
#[allow(dead_code)]
pub(crate) fn set_input_enabled(&mut self, enabled: bool, placeholder: Option<String>) {
self.input_enabled = enabled;
@ -3344,6 +3629,32 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
fn schedule_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
const HOLD_DELAY_MILLIS: u64 = 500;
if let Ok(handle) = Handle::try_current() {
let flag_clone = flag;
let frame_clone = frame;
handle.spawn(async move {
tokio::time::sleep(Duration::from_millis(HOLD_DELAY_MILLIS)).await;
Self::complete_space_hold_timer(flag_clone, frame_clone);
});
} else {
thread::spawn(move || {
thread::sleep(Duration::from_millis(HOLD_DELAY_MILLIS));
Self::complete_space_hold_timer(flag, frame);
});
}
}
#[cfg(not(target_os = "linux"))]
fn complete_space_hold_timer(flag: Arc<AtomicBool>, frame: Option<FrameRequester>) {
flag.store(true, Ordering::Relaxed);
if let Some(frame) = frame {
frame.schedule_frame();
}
}
pub(crate) fn set_status_line(&mut self, status_line: Option<Line<'static>>) -> bool {
if self.status_line_value == status_line {
return false;
@ -3361,6 +3672,280 @@ impl ChatComposer {
}
}
#[cfg(not(target_os = "linux"))]
impl ChatComposer {
pub(crate) fn process_space_hold_trigger(&mut self) {
if self.voice_transcription_enabled()
&& let Some(flag) = self.voice_state.space_hold_trigger.as_ref()
&& flag.load(Ordering::Relaxed)
&& self.voice_state.space_hold_started_at.is_some()
&& self.voice_state.voice.is_none()
{
let _ = self.on_space_hold_timeout();
}
const SPACE_REPEAT_INITIAL_GRACE_MILLIS: u64 = 700;
const SPACE_REPEAT_IDLE_TIMEOUT_MILLIS: u64 = 250;
if !self.voice_state.key_release_supported && self.voice_state.voice.is_some() {
let now = Instant::now();
let initial_grace = Duration::from_millis(SPACE_REPEAT_INITIAL_GRACE_MILLIS);
let repeat_idle_timeout = Duration::from_millis(SPACE_REPEAT_IDLE_TIMEOUT_MILLIS);
if let Some(started_at) = self.voice_state.space_recording_started_at
&& now.saturating_duration_since(started_at) >= initial_grace
{
let should_stop = match self.voice_state.space_recording_last_repeat_at {
Some(last_repeat_at) => {
now.saturating_duration_since(last_repeat_at) >= repeat_idle_timeout
}
None => true,
};
if should_stop {
let _ = self.stop_recording_and_start_transcription();
}
}
}
}
/// Called when the 500ms space hold timeout elapses.
///
/// On terminals without key-release reporting, this only transitions into voice capture if we
/// observed repeated Space events while pending; otherwise the keypress is treated as a typed
/// space.
pub(crate) fn on_space_hold_timeout(&mut self) -> bool {
if !self.voice_transcription_enabled() {
return false;
}
if self.voice_state.voice.is_some() {
return false;
}
if self.voice_state.space_hold_started_at.is_some() {
if !self.voice_state.key_release_supported && !self.voice_state.space_hold_repeat_seen {
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let _ = self.textarea.replace_element_by_id(&id, " ");
}
self.voice_state.space_hold_started_at = None;
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
return true;
}
// Preserve the typed space when transitioning into voice capture, but
// avoid duplicating an existing trailing space. In either case,
// convert/remove the temporary named element before inserting the
// recording/transcribing placeholder.
if let Some(id) = self.voice_state.space_hold_element_id.take() {
let replacement = if self
.textarea
.named_element_range(&id)
.and_then(|range| self.textarea.text()[..range.start].chars().next_back())
.is_some_and(|ch| ch == ' ')
{
""
} else {
" "
};
let _ = self.textarea.replace_element_by_id(&id, replacement);
}
// Clear pending state before starting capture
self.voice_state.space_hold_started_at = None;
self.voice_state.space_hold_trigger = None;
self.voice_state.space_hold_repeat_seen = false;
// Start voice capture
self.start_recording_with_placeholder()
} else {
false
}
}
/// Stop recording if active, update the placeholder, and spawn background transcription.
/// Returns true if the UI should redraw.
fn stop_recording_and_start_transcription(&mut self) -> bool {
let Some(vc) = self.voice_state.voice.take() else {
return false;
};
self.voice_state.space_recording_started_at = None;
self.voice_state.space_recording_last_repeat_at = None;
match vc.stop() {
Ok(audio) => {
// If the recording is too short, remove the placeholder immediately
// and skip the transcribing state entirely.
let total_samples = audio.data.len() as f32;
let samples_per_second = (audio.sample_rate as f32) * (audio.channels as f32);
let duration_seconds = if samples_per_second > 0.0 {
total_samples / samples_per_second
} else {
0.0
};
const MIN_DURATION_SECONDS: f32 = 1.0;
if duration_seconds < MIN_DURATION_SECONDS {
if let Some(id) = self.voice_state.recording_placeholder_id.take() {
let _ = self.textarea.replace_element_by_id(&id, "");
}
return true;
}
// Otherwise, update the placeholder to show a spinner and proceed.
let id = match self.voice_state.recording_placeholder_id.take() {
Some(id) => id,
None => self.next_id(),
};
let placeholder_range = self.textarea.named_element_range(&id);
let prompt_source = if let Some(range) = &placeholder_range {
self.textarea.text()[..range.start].to_string()
} else {
self.textarea.text().to_string()
};
// Initialize with first spinner frame immediately.
let _ = self.textarea.update_named_element_by_id(&id, "");
// Spawn animated braille spinner until transcription finishes (or times out).
self.spawn_transcribing_spinner(id.clone());
let tx = self.app_event_tx.clone();
crate::voice::transcribe_async(id, audio, Some(prompt_source), tx);
true
}
Err(e) => {
tracing::error!("failed to stop voice capture: {e}");
true
}
}
}
/// Start voice capture and insert a placeholder element for the live meter.
/// Returns true if recording began and UI should redraw; false on failure.
fn start_recording_with_placeholder(&mut self) -> bool {
match crate::voice::VoiceCapture::start() {
Ok(vc) => {
self.voice_state.voice = Some(vc);
if self.voice_state.key_release_supported {
self.voice_state.space_recording_started_at = None;
} else {
self.voice_state.space_recording_started_at = Some(Instant::now());
}
self.voice_state.space_recording_last_repeat_at = None;
// Insert visible placeholder for the meter (no label)
let id = self.next_id();
self.textarea.insert_named_element("", id.clone());
self.voice_state.recording_placeholder_id = Some(id);
// Spawn metering animation
if let Some(v) = &self.voice_state.voice {
let data = v.data_arc();
let stop = v.stopped_flag();
let sr = v.sample_rate();
let ch = v.channels();
let peak = v.last_peak_arc();
if let Some(idref) = &self.voice_state.recording_placeholder_id {
self.spawn_recording_meter(idref.clone(), sr, ch, data, peak, stop);
}
}
true
}
Err(e) => {
self.voice_state.space_recording_started_at = None;
self.voice_state.space_recording_last_repeat_at = None;
tracing::error!("failed to start voice capture: {e}");
false
}
}
}
fn spawn_recording_meter(
&self,
id: String,
_sample_rate: u32,
_channels: u16,
_data: Arc<Mutex<Vec<i16>>>,
last_peak: Arc<std::sync::atomic::AtomicU16>,
stop: Arc<std::sync::atomic::AtomicBool>,
) {
let tx = self.app_event_tx.clone();
let task = move || {
use std::time::Duration;
let mut meter = crate::voice::RecordingMeterState::new();
loop {
if stop.load(Ordering::Relaxed) {
break;
}
let text = meter.next_text(last_peak.load(Ordering::Relaxed));
tx.send(crate::app_event::AppEvent::UpdateRecordingMeter {
id: id.clone(),
text,
});
thread::sleep(Duration::from_millis(100));
}
};
if let Ok(handle) = Handle::try_current() {
handle.spawn_blocking(task);
} else {
thread::spawn(task);
}
}
fn spawn_transcribing_spinner(&mut self, id: String) {
self.stop_transcription_spinner(&id);
let stop = Arc::new(AtomicBool::new(false));
self.spinner_stop_flags
.insert(id.clone(), Arc::clone(&stop));
let tx = self.app_event_tx.clone();
let task = move || {
use std::time::Duration;
let frames: Vec<&'static str> = vec!["", "", "", "", "", "", "", "", "", ""];
let mut i: usize = 0;
// Safety stop after ~60s to avoid a runaway task if events are lost.
let max_ticks = 600usize; // 600 * 100ms = 60s
for _ in 0..max_ticks {
if stop.load(Ordering::Relaxed) {
break;
}
let text = frames[i % frames.len()].to_string();
tx.send(crate::app_event::AppEvent::UpdateRecordingMeter {
id: id.clone(),
text,
});
i = i.wrapping_add(1);
thread::sleep(Duration::from_millis(100));
}
};
if let Ok(handle) = Handle::try_current() {
handle.spawn_blocking(task);
} else {
thread::spawn(task);
}
}
fn stop_transcription_spinner(&mut self, id: &str) {
if let Some(flag) = self.spinner_stop_flags.remove(id) {
flag.store(true, Ordering::Relaxed);
}
}
fn stop_all_transcription_spinners(&mut self) {
for (_id, flag) in self.spinner_stop_flags.drain() {
flag.store(true, Ordering::Relaxed);
}
}
pub fn replace_transcription(&mut self, id: &str, text: &str) {
self.stop_transcription_spinner(id);
let _ = self.textarea.replace_element_by_id(id, text);
}
pub fn update_transcription_in_place(&mut self, id: &str, text: &str) -> bool {
self.textarea.update_named_element_by_id(id, text)
}
pub fn remove_transcription_placeholder(&mut self, id: &str) {
self.stop_transcription_spinner(id);
let _ = self.textarea.replace_element_by_id(id, "");
}
}
fn skill_display_name(skill: &SkillMetadata) -> &str {
skill
.interface
@ -3787,6 +4372,15 @@ fn prompt_selection_action(
}
}
impl Drop for ChatComposer {
fn drop(&mut self) {
// Stop any running spinner tasks.
for (_id, flag) in self.spinner_stop_flags.drain() {
flag.store(true, Ordering::Relaxed);
}
}
}
#[cfg(test)]
mod tests {
use super::*;
@ -5527,11 +6121,19 @@ mod tests {
fn type_chars_humanlike(composer: &mut ChatComposer, chars: &[char]) {
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
for &ch in chars {
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(ch), KeyModifiers::NONE));
std::thread::sleep(ChatComposer::recommended_paste_flush_delay());
let _ = composer.flush_paste_burst_if_due();
if ch == ' ' {
let _ = composer.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Char(' '),
KeyModifiers::NONE,
KeyEventKind::Release,
));
}
}
}
@ -5621,6 +6223,195 @@ mod tests {
assert!(found_error, "expected error history cell to be sent");
}
#[test]
fn voice_transcription_disabled_treats_space_as_normal_input() {
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
true,
);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let _ = composer.handle_key_event(KeyEvent::new(KeyCode::Char(' '), KeyModifiers::NONE));
let _ = composer.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Char(' '),
KeyModifiers::NONE,
KeyEventKind::Release,
));
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(composer.voice_state.space_hold_element_id.is_none());
assert!(composer.voice_state.space_hold_trigger.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_without_release_or_repeat_keeps_typed_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = false;
assert_eq!("x ", composer.textarea.text());
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_uses_hold_path_without_release() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = true;
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
if composer.is_recording() {
let _ = composer.stop_recording_and_start_transcription();
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn space_hold_timeout_with_repeat_does_not_duplicate_existing_space() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
composer.set_voice_transcription_enabled(true);
composer.set_text_content("x ".to_string(), Vec::new(), Vec::new());
composer.move_cursor_to_end();
let elem_id = "space-hold".to_string();
composer.textarea.insert_named_element(" ", elem_id.clone());
composer.voice_state.space_hold_started_at = Some(Instant::now());
composer.voice_state.space_hold_element_id = Some(elem_id);
composer.voice_state.space_hold_trigger = Some(Arc::new(AtomicBool::new(true)));
composer.voice_state.key_release_supported = false;
composer.voice_state.space_hold_repeat_seen = true;
composer.process_space_hold_trigger();
assert_eq!("x ", composer.textarea.text());
assert!(composer.voice_state.space_hold_started_at.is_none());
assert!(!composer.voice_state.space_hold_repeat_seen);
if composer.is_recording() {
let _ = composer.stop_recording_and_start_transcription();
}
}
#[cfg(not(target_os = "linux"))]
#[test]
fn replace_transcription_stops_spinner_for_placeholder() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
let id = "voice-placeholder".to_string();
composer.textarea.insert_named_element("", id.clone());
let flag = Arc::new(AtomicBool::new(false));
composer
.spinner_stop_flags
.insert(id.clone(), Arc::clone(&flag));
composer.replace_transcription(&id, "transcribed text");
assert!(flag.load(Ordering::Relaxed));
assert!(!composer.spinner_stop_flags.contains_key(&id));
assert_eq!(composer.textarea.text(), "transcribed text");
}
#[cfg(not(target_os = "linux"))]
#[test]
fn set_text_content_stops_all_transcription_spinners() {
let (tx, _rx) = unbounded_channel::<AppEvent>();
let sender = AppEventSender::new(tx);
let mut composer = ChatComposer::new(
true,
sender,
false,
"Ask Codex to do anything".to_string(),
false,
);
let flag_one = Arc::new(AtomicBool::new(false));
let flag_two = Arc::new(AtomicBool::new(false));
composer
.spinner_stop_flags
.insert("voice-1".to_string(), Arc::clone(&flag_one));
composer
.spinner_stop_flags
.insert("voice-2".to_string(), Arc::clone(&flag_two));
composer.set_text_content("draft".to_string(), Vec::new(), Vec::new());
assert!(flag_one.load(Ordering::Relaxed));
assert!(flag_two.load(Ordering::Relaxed));
assert!(composer.spinner_stop_flags.is_empty());
}
#[test]
fn extract_args_supports_quoted_paths_single_arg() {
let args = extract_positional_args_for_prompt_line(

View file

@ -33,6 +33,7 @@ use codex_protocol::request_user_input::RequestUserInputEvent;
use codex_protocol::user_input::TextElement;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
use ratatui::text::Line;
@ -204,8 +205,8 @@ impl BottomPane {
placeholder_text,
disable_paste_burst,
);
composer.set_frame_requester(frame_requester.clone());
composer.set_skill_mentions(skills);
Self {
composer,
view_stack: Vec::new(),
@ -291,6 +292,11 @@ impl BottomPane {
self.request_redraw();
}
pub fn set_voice_transcription_enabled(&mut self, enabled: bool) {
self.composer.set_voice_transcription_enabled(enabled);
self.request_redraw();
}
/// Update the key hint shown next to queued messages so it matches the
/// binding that `ChatWidget` actually listens for.
pub(crate) fn set_queued_message_edit_binding(&mut self, binding: KeyBinding) {
@ -327,8 +333,23 @@ impl BottomPane {
/// Forward a key event to the active view or the composer.
pub fn handle_key_event(&mut self, key_event: KeyEvent) -> InputResult {
// Do not globally intercept space; only composer handles hold-to-talk.
// While recording, route all keys to the composer so it can stop on release or next key.
#[cfg(not(target_os = "linux"))]
if self.composer.is_recording() {
let (_ir, needs_redraw) = self.composer.handle_key_event(key_event);
if needs_redraw {
self.request_redraw();
}
return InputResult::None;
}
// If a modal/view is active, handle it here; otherwise forward to composer.
if !self.view_stack.is_empty() {
if key_event.kind == KeyEventKind::Release {
return InputResult::None;
}
// We need three pieces of information after routing the key:
// whether Esc completed the view, whether the view finished for any
// reason, and whether a paste-burst timer should be scheduled.
@ -432,6 +453,7 @@ impl BottomPane {
}
} else {
let needs_redraw = self.composer.handle_paste(pasted);
self.composer.sync_popups();
if needs_redraw {
self.request_redraw();
}
@ -440,9 +462,18 @@ impl BottomPane {
pub(crate) fn insert_str(&mut self, text: &str) {
self.composer.insert_str(text);
self.composer.sync_popups();
self.request_redraw();
}
// Space hold timeout is handled inside ChatComposer via an internal timer.
pub(crate) fn pre_draw_tick(&mut self) {
// Allow composer to process any time-based transitions before drawing
#[cfg(not(target_os = "linux"))]
self.composer.process_space_hold_trigger();
self.composer.sync_popups();
}
/// Replace the composer text with `text`.
///
/// This is intended for fresh input where mention linkage does not need to
@ -895,6 +926,7 @@ impl BottomPane {
.on_history_entry_response(log_id, offset, entry);
if updated {
self.composer.sync_popups();
self.request_redraw();
}
}
@ -973,6 +1005,30 @@ impl BottomPane {
}
}
#[cfg(not(target_os = "linux"))]
impl BottomPane {
pub(crate) fn replace_transcription(&mut self, id: &str, text: &str) {
self.composer.replace_transcription(id, text);
self.composer.sync_popups();
self.request_redraw();
}
pub(crate) fn update_transcription_in_place(&mut self, id: &str, text: &str) -> bool {
let updated = self.composer.update_transcription_in_place(id, text);
if updated {
self.composer.sync_popups();
self.request_redraw();
}
updated
}
pub(crate) fn remove_transcription_placeholder(&mut self, id: &str) {
self.composer.remove_transcription_placeholder(id);
self.composer.sync_popups();
self.request_redraw();
}
}
impl Renderable for BottomPane {
fn render(&self, area: Rect, buf: &mut Buffer) {
self.as_renderable().render(area, buf);
@ -993,6 +1049,7 @@ mod tests {
use crate::status_indicator_widget::StatusDetailsCapitalization;
use codex_protocol::protocol::Op;
use codex_protocol::protocol::SkillScope;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
use insta::assert_snapshot;
use ratatui::buffer::Buffer;
@ -1571,4 +1628,58 @@ mod tests {
assert_eq!(on_ctrl_c_calls.get(), 0);
assert_eq!(handle_calls.get(), 1);
}
#[test]
fn release_events_are_ignored_for_active_view() {
#[derive(Default)]
struct CountingView {
handle_calls: Rc<Cell<usize>>,
}
impl Renderable for CountingView {
fn render(&self, _area: Rect, _buf: &mut Buffer) {}
fn desired_height(&self, _width: u16) -> u16 {
0
}
}
impl BottomPaneView for CountingView {
fn handle_key_event(&mut self, _key_event: KeyEvent) {
self.handle_calls
.set(self.handle_calls.get().saturating_add(1));
}
}
let (tx_raw, _rx) = unbounded_channel::<AppEvent>();
let tx = AppEventSender::new(tx_raw);
let mut pane = BottomPane::new(BottomPaneParams {
app_event_tx: tx,
frame_requester: FrameRequester::test_dummy(),
has_input_focus: true,
enhanced_keys_supported: false,
placeholder_text: "Ask Codex to do anything".to_string(),
disable_paste_burst: false,
animations_enabled: true,
skills: Some(Vec::new()),
});
let handle_calls = Rc::new(Cell::new(0));
pane.push_view(Box::new(CountingView {
handle_calls: Rc::clone(&handle_calls),
}));
pane.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Down,
KeyModifiers::NONE,
KeyEventKind::Press,
));
pane.handle_key_event(KeyEvent::new_with_kind(
KeyCode::Down,
KeyModifiers::NONE,
KeyEventKind::Release,
));
assert_eq!(handle_calls.get(), 1);
}
}

View file

@ -3,6 +3,7 @@ use codex_protocol::user_input::ByteRange;
use codex_protocol::user_input::TextElement as UserTextElement;
use crossterm::event::KeyCode;
use crossterm::event::KeyEvent;
use crossterm::event::KeyEventKind;
use crossterm::event::KeyModifiers;
use ratatui::buffer::Buffer;
use ratatui::layout::Rect;
@ -27,6 +28,7 @@ fn is_word_separator(ch: char) -> bool {
struct TextElement {
id: u64,
range: Range<usize>,
name: Option<String>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
@ -101,6 +103,7 @@ impl TextArea {
self.elements.push(TextElement {
id,
range: start..end,
name: None,
});
}
self.elements.sort_by_key(|e| e.range.start);
@ -256,6 +259,11 @@ impl TextArea {
}
pub fn input(&mut self, event: KeyEvent) {
// Only process key presses or repeats; ignore releases to avoid inserting
// characters on key-up events when modifiers are no longer reported.
if !matches!(event.kind, KeyEventKind::Press | KeyEventKind::Repeat) {
return;
}
match event {
// Some terminals (or configurations) send Control key chords as
// C0 control characters without reporting the CONTROL modifier.
@ -886,6 +894,73 @@ impl TextArea {
id
}
#[cfg(not(target_os = "linux"))]
pub fn insert_named_element(&mut self, text: &str, id: String) {
let start = self.clamp_pos_for_insertion(self.cursor_pos);
self.insert_str_at(start, text);
let end = start + text.len();
self.add_element_with_id(start..end, Some(id));
// Place cursor at end of inserted element
self.set_cursor(end);
}
pub fn replace_element_by_id(&mut self, id: &str, text: &str) -> bool {
if let Some(idx) = self
.elements
.iter()
.position(|e| e.name.as_deref() == Some(id))
{
let range = self.elements[idx].range.clone();
self.replace_range_raw(range, text);
self.elements.retain(|e| e.name.as_deref() != Some(id));
true
} else {
false
}
}
/// Update the element's text in place, preserving its id so callers can
/// update it again later (e.g. recording -> transcribing -> final).
#[allow(dead_code)]
pub fn update_named_element_by_id(&mut self, id: &str, text: &str) -> bool {
if let Some(elem_idx) = self
.elements
.iter()
.position(|e| e.name.as_deref() == Some(id))
{
let old_range = self.elements[elem_idx].range.clone();
let start = old_range.start;
self.replace_range_raw(old_range, text);
// After replace_range_raw, the old element entry was removed if fully overlapped.
// Re-add an updated element with the same id and new range.
let new_end = start + text.len();
self.add_element_with_id(start..new_end, Some(id.to_string()));
true
} else {
false
}
}
#[allow(dead_code)]
pub fn named_element_range(&self, id: &str) -> Option<std::ops::Range<usize>> {
self.elements
.iter()
.find(|e| e.name.as_deref() == Some(id))
.map(|e| e.range.clone())
}
fn add_element_with_id(&mut self, range: Range<usize>, name: Option<String>) -> u64 {
let id = self.next_element_id();
let elem = TextElement { id, range, name };
self.elements.push(elem);
self.elements.sort_by_key(|e| e.range.start);
id
}
fn add_element(&mut self, range: Range<usize>) -> u64 {
self.add_element_with_id(range, None)
}
/// Mark an existing text range as an atomic element without changing the text.
///
/// This is used to convert already-typed tokens (like `/plan`) into elements
@ -910,12 +985,7 @@ impl TextArea {
{
return None;
}
let id = self.next_element_id();
self.elements.push(TextElement {
id,
range: start..end,
});
self.elements.sort_by_key(|e| e.range.start);
let id = self.add_element(start..end);
Some(id)
}
@ -931,20 +1001,11 @@ impl TextArea {
len_before != self.elements.len()
}
fn add_element(&mut self, range: Range<usize>) -> u64 {
let id = self.next_element_id();
let elem = TextElement { id, range };
self.elements.push(elem);
self.elements.sort_by_key(|e| e.range.start);
id
}
fn next_element_id(&mut self) -> u64 {
let id = self.next_element_id;
self.next_element_id = self.next_element_id.saturating_add(1);
id
}
fn find_element_containing(&self, pos: usize) -> Option<usize> {
self.elements
.iter()

View file

@ -2259,6 +2259,10 @@ impl ChatWidget {
);
}
pub(crate) fn pre_draw_tick(&mut self) {
self.bottom_pane.pre_draw_tick();
}
/// Handle completion of an `AgentMessage` turn item.
///
/// Commentary completion sets a deferred restore flag so the status row
@ -2848,6 +2852,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_steer_enabled(widget.config.features.enabled(Feature::Steer));
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@ -3016,6 +3023,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_steer_enabled(widget.config.features.enabled(Feature::Steer));
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@ -3173,6 +3183,9 @@ impl ChatWidget {
widget
.bottom_pane
.set_steer_enabled(widget.config.features.enabled(Feature::Steer));
widget.bottom_pane.set_voice_transcription_enabled(
widget.config.features.enabled(Feature::VoiceTranscription),
);
widget
.bottom_pane
.set_status_line_enabled(!widget.configured_status_line_items().is_empty());
@ -6370,6 +6383,9 @@ impl ChatWidget {
if feature == Feature::Steer {
self.bottom_pane.set_steer_enabled(enabled);
}
if feature == Feature::VoiceTranscription {
self.bottom_pane.set_voice_transcription_enabled(enabled);
}
if feature == Feature::Personality {
self.sync_personality_command_enabled();
}
@ -7521,6 +7537,29 @@ impl ChatWidget {
}
}
#[cfg(not(target_os = "linux"))]
impl ChatWidget {
pub(crate) fn replace_transcription(&mut self, id: &str, text: &str) {
self.bottom_pane.replace_transcription(id, text);
// Ensure the UI redraws to reflect the updated transcription.
self.request_redraw();
}
pub(crate) fn update_transcription_in_place(&mut self, id: &str, text: &str) -> bool {
let updated = self.bottom_pane.update_transcription_in_place(id, text);
if updated {
self.request_redraw();
}
updated
}
pub(crate) fn remove_transcription_placeholder(&mut self, id: &str) {
self.bottom_pane.remove_transcription_placeholder(id);
// Ensure the UI redraws to reflect placeholder removal.
self.request_redraw();
}
}
fn has_websocket_timing_metrics(summary: RuntimeMetricsSummary) -> bool {
summary.responses_api_overhead_ms > 0
|| summary.responses_api_inference_time_ms > 0

View file

@ -114,7 +114,79 @@ pub mod update_action;
mod update_prompt;
mod updates;
mod version;
#[cfg(all(not(target_os = "linux"), feature = "voice-input"))]
mod voice;
#[cfg(all(not(target_os = "linux"), not(feature = "voice-input")))]
mod voice {
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU16;
pub struct RecordedAudio {
pub data: Vec<i16>,
pub sample_rate: u32,
pub channels: u16,
}
pub struct VoiceCapture;
pub(crate) struct RecordingMeterState;
impl VoiceCapture {
pub fn start() -> Result<Self, String> {
Err("voice input is unavailable in this build".to_string())
}
pub fn stop(self) -> Result<RecordedAudio, String> {
Err("voice input is unavailable in this build".to_string())
}
pub fn data_arc(&self) -> Arc<Mutex<Vec<i16>>> {
Arc::new(Mutex::new(Vec::new()))
}
pub fn stopped_flag(&self) -> Arc<AtomicBool> {
Arc::new(AtomicBool::new(true))
}
pub fn sample_rate(&self) -> u32 {
0
}
pub fn channels(&self) -> u16 {
0
}
pub fn last_peak_arc(&self) -> Arc<AtomicU16> {
Arc::new(AtomicU16::new(0))
}
}
impl RecordingMeterState {
pub(crate) fn new() -> Self {
Self
}
pub(crate) fn next_text(&mut self, _peak: u16) -> String {
"⠤⠤⠤⠤".to_string()
}
}
pub fn transcribe_async(
id: String,
_audio: RecordedAudio,
_context: Option<String>,
tx: AppEventSender,
) {
tx.send(AppEvent::TranscriptionFailed {
id,
error: "voice input is unavailable in this build".to_string(),
});
}
}
mod wrapping;
#[cfg(test)]

517
codex-rs/tui/src/voice.rs Normal file
View file

@ -0,0 +1,517 @@
use crate::app_event::AppEvent;
use crate::app_event_sender::AppEventSender;
use codex_core::auth::AuthCredentialsStoreMode;
use codex_core::config::Config;
use codex_core::config::find_codex_home;
use codex_core::default_client::get_codex_user_agent;
use codex_login::AuthMode;
use codex_login::CodexAuth;
use cpal::traits::DeviceTrait;
use cpal::traits::HostTrait;
use cpal::traits::StreamTrait;
use hound::SampleFormat;
use hound::WavSpec;
use hound::WavWriter;
use std::collections::VecDeque;
use std::io::Cursor;
use std::sync::Arc;
use std::sync::Mutex;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicU16;
use std::sync::atomic::Ordering;
use tracing::error;
use tracing::info;
use tracing::trace;
struct TranscriptionAuthContext {
mode: AuthMode,
bearer_token: String,
chatgpt_account_id: Option<String>,
chatgpt_base_url: String,
}
pub struct RecordedAudio {
pub data: Vec<i16>,
pub sample_rate: u32,
pub channels: u16,
}
pub struct VoiceCapture {
stream: Option<cpal::Stream>,
sample_rate: u32,
channels: u16,
data: Arc<Mutex<Vec<i16>>>,
stopped: Arc<AtomicBool>,
last_peak: Arc<AtomicU16>,
}
impl VoiceCapture {
pub fn start() -> Result<Self, String> {
let (device, config) = select_input_device_and_config()?;
let sample_rate = config.sample_rate().0;
let channels = config.channels();
let data: Arc<Mutex<Vec<i16>>> = Arc::new(Mutex::new(Vec::new()));
let stopped = Arc::new(AtomicBool::new(false));
let last_peak = Arc::new(AtomicU16::new(0));
let stream = build_input_stream(&device, &config, data.clone(), last_peak.clone())?;
stream
.play()
.map_err(|e| format!("failed to start input stream: {e}"))?;
Ok(Self {
stream: Some(stream),
sample_rate,
channels,
data,
stopped,
last_peak,
})
}
pub fn stop(mut self) -> Result<RecordedAudio, String> {
// Mark stopped so any metering task can exit cleanly.
self.stopped.store(true, Ordering::SeqCst);
// Dropping the stream stops capture.
self.stream.take();
let data = self
.data
.lock()
.map_err(|_| "failed to lock audio buffer".to_string())?
.clone();
Ok(RecordedAudio {
data,
sample_rate: self.sample_rate,
channels: self.channels,
})
}
pub fn data_arc(&self) -> Arc<Mutex<Vec<i16>>> {
self.data.clone()
}
pub fn stopped_flag(&self) -> Arc<AtomicBool> {
self.stopped.clone()
}
pub fn sample_rate(&self) -> u32 {
self.sample_rate
}
pub fn channels(&self) -> u16 {
self.channels
}
pub fn last_peak_arc(&self) -> Arc<AtomicU16> {
self.last_peak.clone()
}
}
pub(crate) struct RecordingMeterState {
history: VecDeque<char>,
noise_ema: f64,
env: f64,
}
impl RecordingMeterState {
pub(crate) fn new() -> Self {
let mut history = VecDeque::with_capacity(4);
while history.len() < 4 {
history.push_back('⠤');
}
Self {
history,
noise_ema: 0.02,
env: 0.0,
}
}
pub(crate) fn next_text(&mut self, peak: u16) -> String {
const SYMBOLS: [char; 7] = ['⠤', '⠴', '⠶', '⠷', '⡷', '⡿', '⣿'];
const ALPHA_NOISE: f64 = 0.05;
const ATTACK: f64 = 0.80;
const RELEASE: f64 = 0.25;
let latest_peak = peak as f64 / (i16::MAX as f64);
if latest_peak > self.env {
self.env = ATTACK * latest_peak + (1.0 - ATTACK) * self.env;
} else {
self.env = RELEASE * latest_peak + (1.0 - RELEASE) * self.env;
}
let rms_approx = self.env * 0.7;
self.noise_ema = (1.0 - ALPHA_NOISE) * self.noise_ema + ALPHA_NOISE * rms_approx;
let ref_level = self.noise_ema.max(0.01);
let fast_signal = 0.8 * latest_peak + 0.2 * self.env;
let target = 2.0f64;
let raw = (fast_signal / (ref_level * target)).max(0.0);
let k = 1.6f64;
let compressed = (raw.ln_1p() / k.ln_1p()).min(1.0);
let idx = (compressed * (SYMBOLS.len() as f64 - 1.0))
.round()
.clamp(0.0, SYMBOLS.len() as f64 - 1.0) as usize;
let level_char = SYMBOLS[idx];
if self.history.len() >= 4 {
self.history.pop_front();
}
self.history.push_back(level_char);
let mut text = String::with_capacity(4);
for ch in &self.history {
text.push(*ch);
}
text
}
}
pub fn transcribe_async(
id: String,
audio: RecordedAudio,
context: Option<String>,
tx: AppEventSender,
) {
std::thread::spawn(move || {
// Enforce minimum duration to avoid garbage outputs.
const MIN_DURATION_SECONDS: f32 = 1.0;
let duration_seconds = clip_duration_seconds(&audio);
if duration_seconds < MIN_DURATION_SECONDS {
let msg = format!(
"recording too short ({duration_seconds:.2}s); minimum is {MIN_DURATION_SECONDS:.2}s"
);
info!("{msg}");
tx.send(AppEvent::TranscriptionFailed { id, error: msg });
return;
}
// Encode entire clip as normalized WAV.
let wav_bytes = match encode_wav_normalized(&audio) {
Ok(b) => b,
Err(e) => {
error!("failed to encode wav: {e}");
tx.send(AppEvent::TranscriptionFailed { id, error: e });
return;
}
};
// Run the HTTP request on a small, dedicated runtime.
let rt = match tokio::runtime::Runtime::new() {
Ok(rt) => rt,
Err(e) => {
error!("failed to create tokio runtime: {e}");
return;
}
};
let tx2 = tx.clone();
let id2 = id.clone();
let res: Result<String, String> = rt
.block_on(async move { transcribe_bytes(wav_bytes, context, duration_seconds).await });
match res {
Ok(text) => {
tx2.send(AppEvent::TranscriptionComplete { id: id2, text });
info!("voice transcription succeeded");
}
Err(e) => {
error!("voice transcription error: {e}");
tx.send(AppEvent::TranscriptionFailed { id, error: e });
}
}
});
}
// -------------------------
// Voice input helpers
// -------------------------
fn select_input_device_and_config() -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
let host = cpal::default_host();
let device = host
.default_input_device()
.ok_or_else(|| "no input audio device available".to_string())?;
let config = device
.default_input_config()
.map_err(|e| format!("failed to get default input config: {e}"))?;
Ok((device, config))
}
fn build_input_stream(
device: &cpal::Device,
config: &cpal::SupportedStreamConfig,
data: Arc<Mutex<Vec<i16>>>,
last_peak: Arc<AtomicU16>,
) -> Result<cpal::Stream, String> {
match config.sample_format() {
cpal::SampleFormat::F32 => device
.build_input_stream(
&config.clone().into(),
move |input: &[f32], _| {
let peak = peak_f32(input);
last_peak.store(peak, Ordering::Relaxed);
if let Ok(mut buf) = data.lock() {
for &s in input {
buf.push(f32_to_i16(s));
}
}
},
move |err| error!("audio input error: {err}"),
None,
)
.map_err(|e| format!("failed to build input stream: {e}")),
cpal::SampleFormat::I16 => device
.build_input_stream(
&config.clone().into(),
move |input: &[i16], _| {
let peak = peak_i16(input);
last_peak.store(peak, Ordering::Relaxed);
if let Ok(mut buf) = data.lock() {
buf.extend_from_slice(input);
}
},
move |err| error!("audio input error: {err}"),
None,
)
.map_err(|e| format!("failed to build input stream: {e}")),
cpal::SampleFormat::U16 => device
.build_input_stream(
&config.clone().into(),
move |input: &[u16], _| {
if let Ok(mut buf) = data.lock() {
let peak = convert_u16_to_i16_and_peak(input, &mut buf);
last_peak.store(peak, Ordering::Relaxed);
}
},
move |err| error!("audio input error: {err}"),
None,
)
.map_err(|e| format!("failed to build input stream: {e}")),
_ => Err("unsupported input sample format".to_string()),
}
}
#[inline]
fn f32_abs_to_u16(x: f32) -> u16 {
let peak_u = (x.abs().min(1.0) * i16::MAX as f32) as i32;
peak_u.max(0) as u16
}
#[inline]
fn f32_to_i16(s: f32) -> i16 {
(s.clamp(-1.0, 1.0) * i16::MAX as f32) as i16
}
fn peak_f32(input: &[f32]) -> u16 {
let mut peak: f32 = 0.0;
for &s in input {
let a = s.abs();
if a > peak {
peak = a;
}
}
f32_abs_to_u16(peak)
}
fn peak_i16(input: &[i16]) -> u16 {
let mut peak: i32 = 0;
for &s in input {
let a = (s as i32).unsigned_abs() as i32;
if a > peak {
peak = a;
}
}
peak as u16
}
fn convert_u16_to_i16_and_peak(input: &[u16], out: &mut Vec<i16>) -> u16 {
let mut peak: i32 = 0;
for &s in input {
let v_i16 = (s as i32 - 32768) as i16;
let a = (v_i16 as i32).unsigned_abs() as i32;
if a > peak {
peak = a;
}
out.push(v_i16);
}
peak as u16
}
// -------------------------
// Transcription helpers
// -------------------------
fn clip_duration_seconds(audio: &RecordedAudio) -> f32 {
let total_samples = audio.data.len() as f32;
let samples_per_second = (audio.sample_rate as f32) * (audio.channels as f32);
if samples_per_second > 0.0 {
total_samples / samples_per_second
} else {
0.0
}
}
fn encode_wav_normalized(audio: &RecordedAudio) -> Result<Vec<u8>, String> {
let mut wav_bytes: Vec<u8> = Vec::new();
let spec = WavSpec {
channels: audio.channels,
sample_rate: audio.sample_rate,
bits_per_sample: 16,
sample_format: SampleFormat::Int,
};
let mut cursor = Cursor::new(&mut wav_bytes);
let mut writer =
WavWriter::new(&mut cursor, spec).map_err(|_| "failed to create wav writer".to_string())?;
// Simple peak normalization with headroom to improve audibility on quiet inputs.
let segment = &audio.data[..];
let mut peak: i16 = 0;
for &s in segment {
let a = s.unsigned_abs();
if a > peak.unsigned_abs() {
peak = s;
}
}
let peak_abs = (peak as i32).unsigned_abs() as i32;
let target = (i16::MAX as f32) * 0.9; // leave some headroom
let gain: f32 = if peak_abs > 0 {
target / (peak_abs as f32)
} else {
1.0
};
for &s in segment {
let v = ((s as f32) * gain)
.round()
.clamp(i16::MIN as f32, i16::MAX as f32) as i16;
writer
.write_sample(v)
.map_err(|_| "failed writing wav sample".to_string())?;
}
writer
.finalize()
.map_err(|_| "failed to finalize wav".to_string())?;
Ok(wav_bytes)
}
fn normalize_chatgpt_base_url(input: &str) -> String {
let mut base_url = input.to_string();
while base_url.ends_with('/') {
base_url.pop();
}
if (base_url.starts_with("https://chatgpt.com")
|| base_url.starts_with("https://chat.openai.com"))
&& !base_url.contains("/backend-api")
{
base_url = format!("{base_url}/backend-api");
}
base_url
}
async fn resolve_auth() -> Result<TranscriptionAuthContext, String> {
let codex_home = find_codex_home().map_err(|e| format!("failed to find codex home: {e}"))?;
let auth = CodexAuth::from_auth_storage(&codex_home, AuthCredentialsStoreMode::Auto)
.map_err(|e| format!("failed to read auth.json: {e}"))?
.ok_or_else(|| "No Codex auth is configured; please run `codex login`".to_string())?;
let chatgpt_account_id = auth.get_account_id();
let token = auth
.get_token()
.map_err(|e| format!("failed to get auth token: {e}"))?;
let config = Config::load_with_cli_overrides(Vec::new())
.await
.map_err(|e| format!("failed to load config: {e}"))?;
Ok(TranscriptionAuthContext {
mode: auth.api_auth_mode(),
bearer_token: token,
chatgpt_account_id,
chatgpt_base_url: normalize_chatgpt_base_url(&config.chatgpt_base_url),
})
}
async fn transcribe_bytes(
wav_bytes: Vec<u8>,
context: Option<String>,
duration_seconds: f32,
) -> Result<String, String> {
let auth = resolve_auth().await?;
let client = reqwest::Client::new();
let audio_bytes = wav_bytes.len();
let prompt_for_log = context.as_deref().unwrap_or("").to_string();
let (endpoint, request) =
if matches!(auth.mode, AuthMode::Chatgpt | AuthMode::ChatgptAuthTokens) {
let part = reqwest::multipart::Part::bytes(wav_bytes)
.file_name("audio.wav")
.mime_str("audio/wav")
.map_err(|e| format!("failed to set mime: {e}"))?;
let form = reqwest::multipart::Form::new().part("file", part);
let endpoint = format!("{}/transcribe", auth.chatgpt_base_url);
let mut req = client
.post(&endpoint)
.bearer_auth(&auth.bearer_token)
.multipart(form)
.header("User-Agent", get_codex_user_agent());
if let Some(acc) = auth.chatgpt_account_id {
req = req.header("ChatGPT-Account-Id", acc);
}
(endpoint, req)
} else {
let part = reqwest::multipart::Part::bytes(wav_bytes)
.file_name("audio.wav")
.mime_str("audio/wav")
.map_err(|e| format!("failed to set mime: {e}"))?;
let mut form = reqwest::multipart::Form::new()
.text("model", "gpt-4o-transcribe")
.part("file", part);
if let Some(context) = context {
form = form.text("prompt", context);
}
let endpoint = "https://api.openai.com/v1/audio/transcriptions".to_string();
(
endpoint,
client
.post("https://api.openai.com/v1/audio/transcriptions")
.bearer_auth(&auth.bearer_token)
.multipart(form)
.header("User-Agent", get_codex_user_agent()),
)
};
let audio_kib = audio_bytes as f32 / 1024.0;
let mode = auth.mode;
trace!(
"sending transcription request: mode={mode:?} endpoint={endpoint} duration={duration_seconds:.2}s audio={audio_kib:.1}KiB prompt={prompt_for_log}"
);
let resp = request
.send()
.await
.map_err(|e| format!("transcription request failed: {e}"))?;
if !resp.status().is_success() {
let status = resp.status();
let body = resp
.text()
.await
.unwrap_or_else(|_| "<failed to read body>".to_string());
return Err(format!("transcription failed: {status} {body}"));
}
let v: serde_json::Value = resp
.json()
.await
.map_err(|e| format!("failed to parse json: {e}"))?;
let text = v
.get("text")
.and_then(|t| t.as_str())
.unwrap_or("")
.to_string();
if text.is_empty() {
Err("empty transcription result".to_string())
} else {
Ok(text)
}
}

View file

@ -71,15 +71,3 @@ index 6ffc9f7..e02089a 100644
],
}),
)
diff --git a/toolchain/llvm/llvm.bzl b/toolchain/llvm/llvm.bzl
index d068085..c152552 100644
--- a/toolchain/llvm/llvm.bzl
+++ b/toolchain/llvm/llvm.bzl
@@ -7,6 +7,7 @@ def declare_llvm_targets(*, suffix = ""):
name = "builtin_headers",
# Grab whichever version-specific dir is there.
path = native.glob(["lib/clang/*"], exclude_directories = 0)[0] + "/include",
+ visibility = ["//visibility:public"],
)
# Convenient exports