From 4f2415f26c84e976c377807b27cb99535ba5eb92 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Tue, 16 Jul 2024 23:39:50 +0300 Subject: [PATCH] Migrate audio pipeline to float from 16-bit int Float is the native format of opus codec and most if not all capture backends. --- src/audio.cpp | 6 +++--- src/platform/common.h | 2 +- src/platform/linux/audio.cpp | 11 ++++++----- src/platform/macos/av_audio.h | 2 +- src/platform/macos/av_audio.m | 4 ++-- src/platform/macos/microphone.mm | 10 +++++----- src/platform/windows/audio.cpp | 22 +++++++++++----------- 7 files changed, 29 insertions(+), 28 deletions(-) diff --git a/src/audio.cpp b/src/audio.cpp index ac1947fec74..b24ae61350f 100644 --- a/src/audio.cpp +++ b/src/audio.cpp @@ -18,7 +18,7 @@ namespace audio { using namespace std::literals; using opus_t = util::safe_ptr; - using sample_queue_t = std::shared_ptr>>; + using sample_queue_t = std::shared_ptr>>; struct audio_ctx_t { // We want to change the sink for the first stream only @@ -128,7 +128,7 @@ namespace audio { while (auto sample = samples->pop()) { buffer_t packet { 1400 }; - int bytes = opus_multistream_encode(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size()); + int bytes = opus_multistream_encode_float(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size()); if (bytes < 0) { BOOST_LOG(error) << "Couldn't encode audio: "sv << opus_strerror(bytes); packets->stop(); @@ -228,7 +228,7 @@ namespace audio { int samples_per_frame = frame_size * stream.channelCount; while (!shutdown_event->peek()) { - std::vector sample_buffer; + std::vector sample_buffer; sample_buffer.resize(samples_per_frame); auto status = mic->sample(sample_buffer); diff --git a/src/platform/common.h b/src/platform/common.h index 595b0cbecb0..e48acd60a04 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -525,7 +525,7 @@ namespace platf { class mic_t { public: virtual capture_e - sample(std::vector &frame_buffer) = 0; + sample(std::vector &frame_buffer) = 0; virtual ~mic_t() = default; }; diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp index f742accbeec..32aa36a61b5 100644 --- a/src/platform/linux/audio.cpp +++ b/src/platform/linux/audio.cpp @@ -36,7 +36,7 @@ namespace platf { to_string(const char *name, const std::uint8_t *mapping, int channels) { std::stringstream ss; - ss << "rate=48000 sink_name="sv << name << " format=s16le channels="sv << channels << " channel_map="sv; + ss << "rate=48000 sink_name="sv << name << " format=float channels="sv << channels << " channel_map="sv; std::for_each_n(mapping, channels - 1, [&ss](std::uint8_t pos) { ss << pa_channel_position_to_string(position_mapping[pos]) << ','; }); @@ -54,12 +54,12 @@ namespace platf { util::safe_ptr mic; capture_e - sample(std::vector &sample_buf) override { + sample(std::vector &sample_buf) override { auto sample_size = sample_buf.size(); auto buf = sample_buf.data(); int status; - if (pa_simple_read(mic.get(), buf, sample_size * 2, &status)) { + if (pa_simple_read(mic.get(), buf, sample_size * sizeof(float), &status)) { BOOST_LOG(error) << "pa_simple_read() failed: "sv << pa_strerror(status); return capture_e::error; @@ -73,7 +73,7 @@ namespace platf { microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size, std::string source_name) { auto mic = std::make_unique(); - pa_sample_spec ss { PA_SAMPLE_S16LE, sample_rate, (std::uint8_t) channels }; + pa_sample_spec ss { PA_SAMPLE_FLOAT32, sample_rate, (std::uint8_t) channels }; pa_channel_map pa_map; pa_map.channels = channels; @@ -82,7 +82,8 @@ namespace platf { }); pa_buffer_attr pa_attr = {}; - pa_attr.maxlength = frame_size * 8; + pa_attr.fragsize = frame_size * channels * sizeof(float); + pa_attr.maxlength = pa_attr.fragsize * 2; int status; diff --git a/src/platform/macos/av_audio.h b/src/platform/macos/av_audio.h index e3c169eb4be..bf5b13b3d29 100644 --- a/src/platform/macos/av_audio.h +++ b/src/platform/macos/av_audio.h @@ -8,7 +8,7 @@ #include "third-party/TPCircularBuffer/TPCircularBuffer.h" -#define kBufferLength 2048 +#define kBufferLength 4096 @interface AVAudio: NSObject { @public diff --git a/src/platform/macos/av_audio.m b/src/platform/macos/av_audio.m index 7958abce2fa..f0e631ef311 100644 --- a/src/platform/macos/av_audio.m +++ b/src/platform/macos/av_audio.m @@ -87,8 +87,8 @@ - (int)setupMicrophone:(AVCaptureDevice *)device sampleRate:(UInt32)sampleRate f (NSString *) AVFormatIDKey: [NSNumber numberWithUnsignedInt:kAudioFormatLinearPCM], (NSString *) AVSampleRateKey: [NSNumber numberWithUnsignedInt:sampleRate], (NSString *) AVNumberOfChannelsKey: [NSNumber numberWithUnsignedInt:channels], - (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:16], - (NSString *) AVLinearPCMIsFloatKey: @NO, + (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:32], + (NSString *) AVLinearPCMIsFloatKey: @YES, (NSString *) AVLinearPCMIsNonInterleaved: @NO }]; diff --git a/src/platform/macos/microphone.mm b/src/platform/macos/microphone.mm index f82db075b13..1e3a4cd65ed 100644 --- a/src/platform/macos/microphone.mm +++ b/src/platform/macos/microphone.mm @@ -19,23 +19,23 @@ } capture_e - sample(std::vector &sample_in) override { + sample(std::vector &sample_in) override { auto sample_size = sample_in.size(); uint32_t length = 0; void *byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length); - while (length < sample_size * sizeof(std::int16_t)) { + while (length < sample_size * sizeof(float)) { [av_audio_capture.samplesArrivedSignal wait]; byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length); } - const int16_t *sampleBuffer = (int16_t *) byteSampleBuffer; - std::vector vectorBuffer(sampleBuffer, sampleBuffer + sample_size); + const float *sampleBuffer = (float *) byteSampleBuffer; + std::vector vectorBuffer(sampleBuffer, sampleBuffer + sample_size); std::copy_n(std::begin(vectorBuffer), sample_size, std::begin(sample_in)); - TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(std::int16_t)); + TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(float)); return capture_e::ok; } diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp index bdcd66aba87..2101516f6a5 100644 --- a/src/platform/windows/audio.cpp +++ b/src/platform/windows/audio.cpp @@ -152,14 +152,14 @@ namespace platf::audio { wave_format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; wave_format.Format.nChannels = format.channels; wave_format.Format.nSamplesPerSec = SAMPLE_RATE; - wave_format.Format.wBitsPerSample = 16; + wave_format.Format.wBitsPerSample = 32; wave_format.Format.nBlockAlign = wave_format.Format.nChannels * wave_format.Format.wBitsPerSample / 8; wave_format.Format.nAvgBytesPerSec = wave_format.Format.nSamplesPerSec * wave_format.Format.nBlockAlign; wave_format.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); - wave_format.Samples.wValidBitsPerSample = 16; + wave_format.Samples.wValidBitsPerSample = 32; wave_format.dwChannelMask = format.channel_mask; - wave_format.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + wave_format.SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT; return wave_format; } @@ -167,7 +167,7 @@ namespace platf::audio { int set_wave_format(audio::wave_format_t &wave_format, const format_t &format) { wave_format->nSamplesPerSec = SAMPLE_RATE; - wave_format->wBitsPerSample = 16; + wave_format->wBitsPerSample = 32; switch (wave_format->wFormatTag) { case WAVE_FORMAT_PCM: @@ -176,9 +176,9 @@ namespace platf::audio { break; case WAVE_FORMAT_EXTENSIBLE: { auto wave_ex = (PWAVEFORMATEXTENSIBLE) wave_format.get(); - wave_ex->Samples.wValidBitsPerSample = 16; + wave_ex->Samples.wValidBitsPerSample = 32; wave_ex->dwChannelMask = format.channel_mask; - wave_ex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + wave_ex->SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT; break; } default: @@ -341,7 +341,7 @@ namespace platf::audio { class mic_wasapi_t: public mic_t { public: capture_e - sample(std::vector &sample_out) override { + sample(std::vector &sample_out) override { auto sample_size = sample_out.size(); // Refill the sample buffer if needed @@ -432,7 +432,7 @@ namespace platf::audio { } // *2 --> needs to fit double - sample_buf = util::buffer_t { std::max(frames, frame_size) * 2 * channels_out }; + sample_buf = util::buffer_t { std::max(frames, frame_size) * 2 * channels_out }; sample_buf_pos = std::begin(sample_buf); status = audio_client->GetService(IID_IAudioCaptureClient, (void **) &audio_capture); @@ -489,7 +489,7 @@ namespace platf::audio { // Total number of samples struct sample_aligned_t { std::uint32_t uninitialized; - std::int16_t *samples; + float *samples; } sample_aligned; // number of samples / number of channels @@ -588,8 +588,8 @@ namespace platf::audio { REFERENCE_TIME default_latency_ms; - util::buffer_t sample_buf; - std::int16_t *sample_buf_pos; + util::buffer_t sample_buf; + float *sample_buf_pos; int channels; HANDLE mmcss_task_handle = NULL;