From 91bf1538fc8962ecc94ccbece20b9a265a3f6419 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Sat, 16 Nov 2024 11:57:52 +0000 Subject: [PATCH] Receive: Config of decode sample rate/channels (#265) * Receive: Config of decode sample rate/channels This PR allows for dynamic configuration of the output sample rate and channel count of received Opus audio. Users who rely on supported formats should no longer need to manually resample & downmix audio decoded from SSRCs in a call. Opus exposes tuples of (Mono, Stereo) x (8, 12, 16, 24, 48)kHz. Changing this at runtime (mid-call) may cause some audio glitches, as decoder state must be reconstructed from scratch for all affected SSRCs. * Fix doc typo, consistent naming with MixMode. --- src/config.rs | 36 +++++++++++++- src/driver/decode_mode.rs | 69 ++++++++++++++++++++++++++- src/driver/mod.rs | 2 +- src/driver/tasks/udp_rx/mod.rs | 6 +++ src/driver/tasks/udp_rx/ssrc_state.rs | 23 +++++++-- src/events/context/data/voice.rs | 9 +++- 6 files changed, 134 insertions(+), 11 deletions(-) diff --git a/src/config.rs b/src/config.rs index e4d63a26b..1a2b07f04 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,5 @@ -#[cfg(feature = "receive")] -use crate::driver::DecodeMode; +#[cfg(all(feature = "driver", feature = "receive"))] +use crate::driver::{Channels, DecodeMode, SampleRate}; #[cfg(feature = "driver")] use crate::{ driver::{ @@ -61,6 +61,18 @@ pub struct Config { /// [User speaking state]: crate::events::CoreEvent::VoiceTick pub decode_mode: DecodeMode, + #[cfg(all(feature = "driver", feature = "receive"))] + /// Configures the channel layout for output audio when using [`DecodeMode::Decode`]. + /// + /// Defaults to [`Channels::Stereo`]. + pub decode_channels: Channels, + + #[cfg(all(feature = "driver", feature = "receive"))] + /// Configures the sample rate for output audio when using [`DecodeMode::Decode`]. + /// + /// Defaults to [`SampleRate::Hz48000`]. + pub decode_sample_rate: SampleRate, + #[cfg(all(feature = "driver", feature = "receive"))] /// Configures the amount of time after a user/SSRC is inactive before their decoder state /// should be removed. @@ -215,6 +227,10 @@ impl Default for Config { #[cfg(all(feature = "driver", feature = "receive"))] decode_mode: DecodeMode::Decrypt, #[cfg(all(feature = "driver", feature = "receive"))] + decode_channels: Channels::Stereo, + #[cfg(all(feature = "driver", feature = "receive"))] + decode_sample_rate: SampleRate::Hz48000, + #[cfg(all(feature = "driver", feature = "receive"))] decode_state_timeout: Duration::from_secs(60), #[cfg(all(feature = "driver", feature = "receive"))] playout_buffer_length: NonZeroUsize::new(5).unwrap(), @@ -267,6 +283,22 @@ impl Config { self } + #[cfg(feature = "receive")] + /// Sets this `Config`'s channel layout for output audio when using [`DecodeMode::Decode`] + #[must_use] + pub fn decode_channels(mut self, decode_channels: Channels) -> Self { + self.decode_channels = decode_channels; + self + } + + #[cfg(feature = "receive")] + /// Sets this `Config`'s sample rate for output audio when using [`DecodeMode::Decode`] + #[must_use] + pub fn decode_sample_rate(mut self, decode_sample_rate: SampleRate) -> Self { + self.decode_sample_rate = decode_sample_rate; + self + } + #[cfg(feature = "receive")] /// Sets this `Config`'s received packet decoder cleanup timer. #[must_use] diff --git a/src/driver/decode_mode.rs b/src/driver/decode_mode.rs index a3b602b48..8b8976e0a 100644 --- a/src/driver/decode_mode.rs +++ b/src/driver/decode_mode.rs @@ -1,5 +1,7 @@ +use audiopus::{Channels as OpusChannels, SampleRate as OpusRate}; + /// Decode behaviour for received RTP packets within the driver. -#[derive(Clone, Copy, Debug, Eq, PartialEq)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Hash)] #[non_exhaustive] pub enum DecodeMode { /// Packets received from Discord are handed over to events without any @@ -24,3 +26,68 @@ impl DecodeMode { self != DecodeMode::Pass } } + +/// The channel layout of output audio when using [`DecodeMode::Decode`]. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum Channels { + /// Decode received audio packets into a single channel. + Mono, + /// Decode received audio packets into two interleaved channels. + /// + /// Received mono packets' samples will automatically be duplicated across + /// both channels. + /// + /// The default choice. + #[default] + Stereo, +} + +impl Channels { + pub(crate) fn channels(self) -> usize { + match self { + Channels::Mono => 1, + Channels::Stereo => 2, + } + } +} + +impl From for OpusChannels { + fn from(value: Channels) -> Self { + match value { + Channels::Mono => OpusChannels::Mono, + Channels::Stereo => OpusChannels::Stereo, + } + } +} + +/// The sample rate of output audio when using [`DecodeMode::Decode`]. +#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)] +#[non_exhaustive] +pub enum SampleRate { + /// Decode to a sample rate of 8kHz. + Hz8000, + /// Decode to a sample rate of 12kHz. + Hz12000, + /// Decode to a sample rate of 16kHz. + Hz16000, + /// Decode to a sample rate of 24kHz. + Hz24000, + /// Decode to a sample rate of 48kHz. + /// + /// The preferred option for encoding/decoding at or above CD quality. + #[default] + Hz48000, +} + +impl From for OpusRate { + fn from(value: SampleRate) -> Self { + match value { + SampleRate::Hz8000 => OpusRate::Hz8000, + SampleRate::Hz12000 => OpusRate::Hz12000, + SampleRate::Hz16000 => OpusRate::Hz16000, + SampleRate::Hz24000 => OpusRate::Hz24000, + SampleRate::Hz48000 => OpusRate::Hz48000, + } + } +} diff --git a/src/driver/mod.rs b/src/driver/mod.rs index ba98ca193..fab61f08b 100644 --- a/src/driver/mod.rs +++ b/src/driver/mod.rs @@ -28,7 +28,7 @@ use connection::error::{Error, Result}; pub use crypto::CryptoMode; pub(crate) use crypto::CryptoState; #[cfg(feature = "receive")] -pub use decode_mode::DecodeMode; +pub use decode_mode::*; pub use mix_mode::MixMode; pub use scheduler::{ Config as SchedulerConfig, diff --git a/src/driver/tasks/udp_rx/mod.rs b/src/driver/tasks/udp_rx/mod.rs index b2220c869..f67f5d75e 100644 --- a/src/driver/tasks/udp_rx/mod.rs +++ b/src/driver/tasks/udp_rx/mod.rs @@ -66,7 +66,13 @@ impl UdpRx { *interconnect = i; }, Ok(UdpRxMessage::SetConfig(c)) => { + let old_coder = (self.config.decode_channels, self.config.decode_sample_rate); + let new_coder = (c.decode_channels, c.decode_sample_rate); self.config = c; + + if old_coder != new_coder { + self.decoder_map.values_mut().for_each(|v| v.reconfigure_decoder(&self.config)); + } }, Err(flume::RecvError::Disconnected) => break, } diff --git a/src/driver/tasks/udp_rx/ssrc_state.rs b/src/driver/tasks/udp_rx/ssrc_state.rs index 2bcd4b3d2..87b6f1b88 100644 --- a/src/driver/tasks/udp_rx/ssrc_state.rs +++ b/src/driver/tasks/udp_rx/ssrc_state.rs @@ -1,8 +1,8 @@ use super::*; use crate::{ - constants::*, driver::{ tasks::error::{Error, Result}, + Channels, DecodeMode, }, events::context_data::{RtpData, VoiceData}, @@ -11,7 +11,6 @@ use audiopus::{ coder::Decoder as OpusDecoder, error::{Error as OpusError, ErrorCode}, packet::Packet as OpusPacket, - Channels, }; use discortp::{rtp::RtpExtensionPacket, Packet, PacketSize}; use tracing::{error, warn}; @@ -24,6 +23,7 @@ pub struct SsrcState { decode_size: PacketDecodeSize, pub(crate) prune_time: Instant, pub(crate) disconnected: bool, + channels: Channels, } impl SsrcState { @@ -33,14 +33,27 @@ impl SsrcState { Self { playout_buffer: PlayoutBuffer::new(playout_capacity, pkt.get_sequence().0), crypto_mode, - decoder: OpusDecoder::new(SAMPLE_RATE, Channels::Stereo) - .expect("Failed to create new Opus decoder for source."), + decoder: OpusDecoder::new( + config.decode_sample_rate.into(), + config.decode_channels.into(), + ) + .expect("Failed to create new Opus decoder for source."), decode_size: PacketDecodeSize::TwentyMillis, prune_time: Instant::now() + config.decode_state_timeout, disconnected: false, + channels: config.decode_channels, } } + pub fn reconfigure_decoder(&mut self, config: &Config) { + self.decoder = OpusDecoder::new( + config.decode_sample_rate.into(), + config.decode_channels.into(), + ) + .expect("Failed to create new Opus decoder for source."); + self.channels = config.decode_channels; + } + pub fn store_packet(&mut self, packet: StoredPacket, config: &Config) { self.playout_buffer.store_packet(packet, config); } @@ -160,7 +173,7 @@ impl SsrcState { Ok(audio_len) => { // Decoding to stereo: audio_len refers to sample count irrespective of channel count. // => multiply by number of channels. - out.truncate(2 * audio_len); + out.truncate(self.channels.channels() * audio_len); break; }, diff --git a/src/events/context/data/voice.rs b/src/events/context/data/voice.rs index 92dc70d11..b953b5135 100644 --- a/src/events/context/data/voice.rs +++ b/src/events/context/data/voice.rs @@ -30,9 +30,14 @@ pub struct VoiceData { pub packet: Option, /// PCM audio obtained from a user. /// - /// Valid audio data (`Some(audio)` where `audio.len >= 0`) typically contains 20ms of 16-bit stereo PCM audio - /// at 48kHz, using native endianness. Channels are interleaved (i.e., `L, R, L, R, ...`). + /// Valid audio data (`Some(audio)` where `audio.len >= 0`) typically contains 20ms of 16-bit PCM audio + /// using native endianness. This defaults to stereo audio at 48kHz, and can be configured via + /// [`Config::decode_channels`] and [`Config::decode_sample_rate`] -- channels are interleaved + /// (i.e., `L, R, L, R, ...`) if stereo. /// /// This value will be `None` if Songbird is not configured to decode audio. + /// + /// [`Config::decode_channels`]: crate::Config::decode_channels + /// [`Config::decode_sample_rate`]: crate::Config::decode_sample_rate pub decoded_voice: Option>, }