## Summary - switch TUI push-to-talk transcription requests to `gpt-4o-mini-transcribe` - prefer 24 kHz mono `i16` microphone configs and normalize voice input to 24 kHz mono before upload/send - add unit coverage for the new downmix/resample path ## Testing - `just fmt` - `cargo test -p codex-tui`
176 lines
6.3 KiB
Rust
176 lines
6.3 KiB
Rust
use codex_core::config::Config;
|
|
use cpal::traits::DeviceTrait;
|
|
use cpal::traits::HostTrait;
|
|
use tracing::warn;
|
|
|
|
use crate::app_event::RealtimeAudioDeviceKind;
|
|
|
|
const PREFERRED_INPUT_SAMPLE_RATE: u32 = 24_000;
|
|
const PREFERRED_INPUT_CHANNELS: u16 = 1;
|
|
|
|
pub(crate) fn list_realtime_audio_device_names(
|
|
kind: RealtimeAudioDeviceKind,
|
|
) -> Result<Vec<String>, String> {
|
|
let host = cpal::default_host();
|
|
let mut device_names = Vec::new();
|
|
for device in devices(&host, kind)? {
|
|
let Ok(name) = device.name() else {
|
|
continue;
|
|
};
|
|
if !device_names.contains(&name) {
|
|
device_names.push(name);
|
|
}
|
|
}
|
|
Ok(device_names)
|
|
}
|
|
|
|
pub(crate) fn select_configured_input_device_and_config(
|
|
config: &Config,
|
|
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
|
select_device_and_config(RealtimeAudioDeviceKind::Microphone, config)
|
|
}
|
|
|
|
pub(crate) fn select_configured_output_device_and_config(
|
|
config: &Config,
|
|
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
|
select_device_and_config(RealtimeAudioDeviceKind::Speaker, config)
|
|
}
|
|
|
|
pub(crate) fn preferred_input_config(
|
|
device: &cpal::Device,
|
|
) -> Result<cpal::SupportedStreamConfig, String> {
|
|
let supported_configs = device
|
|
.supported_input_configs()
|
|
.map_err(|err| format!("failed to enumerate input audio configs: {err}"))?;
|
|
|
|
supported_configs
|
|
.filter_map(|range| {
|
|
let sample_format_rank = match range.sample_format() {
|
|
cpal::SampleFormat::I16 => 0u8,
|
|
cpal::SampleFormat::U16 => 1u8,
|
|
cpal::SampleFormat::F32 => 2u8,
|
|
_ => return None,
|
|
};
|
|
let sample_rate = preferred_input_sample_rate(&range);
|
|
let sample_rate_penalty = sample_rate.0.abs_diff(PREFERRED_INPUT_SAMPLE_RATE);
|
|
let channel_penalty = range.channels().abs_diff(PREFERRED_INPUT_CHANNELS);
|
|
Some((
|
|
(sample_rate_penalty, channel_penalty, sample_format_rank),
|
|
range.with_sample_rate(sample_rate),
|
|
))
|
|
})
|
|
.min_by_key(|(score, _)| *score)
|
|
.map(|(_, config)| config)
|
|
.or_else(|| device.default_input_config().ok())
|
|
.ok_or_else(|| "failed to get default input config".to_string())
|
|
}
|
|
|
|
fn select_device_and_config(
|
|
kind: RealtimeAudioDeviceKind,
|
|
config: &Config,
|
|
) -> Result<(cpal::Device, cpal::SupportedStreamConfig), String> {
|
|
let host = cpal::default_host();
|
|
let configured_name = configured_name(kind, config);
|
|
let selected = configured_name
|
|
.and_then(|name| find_device_by_name(&host, kind, name))
|
|
.or_else(|| {
|
|
let default_device = default_device(&host, kind);
|
|
if let Some(name) = configured_name && default_device.is_some() {
|
|
warn!(
|
|
"configured {} audio device `{name}` was unavailable; falling back to system default",
|
|
kind.noun()
|
|
);
|
|
}
|
|
default_device
|
|
})
|
|
.ok_or_else(|| missing_device_error(kind, configured_name))?;
|
|
|
|
let stream_config = match kind {
|
|
RealtimeAudioDeviceKind::Microphone => preferred_input_config(&selected)?,
|
|
RealtimeAudioDeviceKind::Speaker => default_config(&selected, kind)?,
|
|
};
|
|
Ok((selected, stream_config))
|
|
}
|
|
|
|
fn configured_name(kind: RealtimeAudioDeviceKind, config: &Config) -> Option<&str> {
|
|
match kind {
|
|
RealtimeAudioDeviceKind::Microphone => config.realtime_audio.microphone.as_deref(),
|
|
RealtimeAudioDeviceKind::Speaker => config.realtime_audio.speaker.as_deref(),
|
|
}
|
|
}
|
|
|
|
fn find_device_by_name(
|
|
host: &cpal::Host,
|
|
kind: RealtimeAudioDeviceKind,
|
|
name: &str,
|
|
) -> Option<cpal::Device> {
|
|
let devices = devices(host, kind).ok()?;
|
|
devices
|
|
.into_iter()
|
|
.find(|device| device.name().ok().as_deref() == Some(name))
|
|
}
|
|
|
|
fn devices(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Result<Vec<cpal::Device>, String> {
|
|
match kind {
|
|
RealtimeAudioDeviceKind::Microphone => host
|
|
.input_devices()
|
|
.map(|devices| devices.collect())
|
|
.map_err(|err| format!("failed to enumerate input audio devices: {err}")),
|
|
RealtimeAudioDeviceKind::Speaker => host
|
|
.output_devices()
|
|
.map(|devices| devices.collect())
|
|
.map_err(|err| format!("failed to enumerate output audio devices: {err}")),
|
|
}
|
|
}
|
|
|
|
fn default_device(host: &cpal::Host, kind: RealtimeAudioDeviceKind) -> Option<cpal::Device> {
|
|
match kind {
|
|
RealtimeAudioDeviceKind::Microphone => host.default_input_device(),
|
|
RealtimeAudioDeviceKind::Speaker => host.default_output_device(),
|
|
}
|
|
}
|
|
|
|
fn default_config(
|
|
device: &cpal::Device,
|
|
kind: RealtimeAudioDeviceKind,
|
|
) -> Result<cpal::SupportedStreamConfig, String> {
|
|
match kind {
|
|
RealtimeAudioDeviceKind::Microphone => device
|
|
.default_input_config()
|
|
.map_err(|err| format!("failed to get default input config: {err}")),
|
|
RealtimeAudioDeviceKind::Speaker => device
|
|
.default_output_config()
|
|
.map_err(|err| format!("failed to get default output config: {err}")),
|
|
}
|
|
}
|
|
|
|
fn preferred_input_sample_rate(range: &cpal::SupportedStreamConfigRange) -> cpal::SampleRate {
|
|
let min = range.min_sample_rate().0;
|
|
let max = range.max_sample_rate().0;
|
|
if (min..=max).contains(&PREFERRED_INPUT_SAMPLE_RATE) {
|
|
cpal::SampleRate(PREFERRED_INPUT_SAMPLE_RATE)
|
|
} else if PREFERRED_INPUT_SAMPLE_RATE < min {
|
|
cpal::SampleRate(min)
|
|
} else {
|
|
cpal::SampleRate(max)
|
|
}
|
|
}
|
|
|
|
fn missing_device_error(kind: RealtimeAudioDeviceKind, configured_name: Option<&str>) -> String {
|
|
match (kind, configured_name) {
|
|
(RealtimeAudioDeviceKind::Microphone, Some(name)) => {
|
|
format!(
|
|
"configured microphone `{name}` was unavailable and no default input audio device was found"
|
|
)
|
|
}
|
|
(RealtimeAudioDeviceKind::Speaker, Some(name)) => {
|
|
format!(
|
|
"configured speaker `{name}` was unavailable and no default output audio device was found"
|
|
)
|
|
}
|
|
(RealtimeAudioDeviceKind::Microphone, None) => {
|
|
"no input audio device available".to_string()
|
|
}
|
|
(RealtimeAudioDeviceKind::Speaker, None) => "no output audio device available".to_string(),
|
|
}
|
|
}
|