Migrate audio pipeline to float from 16-bit integer (LizardByte#2873)

Co-authored-by: Cameron Gutman <[email protected]>
KuleRucket · Oct 9, 2024 · 7be0f49 · 7be0f49
1 parent 3a6ebb7
commit 7be0f49
Show file tree

Hide file tree

Showing 9 changed files with 433 additions and 296 deletions.
diff --git a/src/audio.cpp b/src/audio.cpp
@@ -18,7 +18,7 @@
 namespace audio {
   using namespace std::literals;
   using opus_t = util::safe_ptr<OpusMSEncoder, opus_multistream_encoder_destroy>;
-  using sample_queue_t = std::shared_ptr<safe::queue_t<std::vector<std::int16_t>>>;
+  using sample_queue_t = std::shared_ptr<safe::queue_t<std::vector<float>>>;
 
   struct audio_ctx_t {
     // We want to change the sink for the first stream only
@@ -128,7 +128,7 @@ namespace audio {
     while (auto sample = samples->pop()) {
       buffer_t packet { 1400 };
 
-      int bytes = opus_multistream_encode(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size());
+      int bytes = opus_multistream_encode_float(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size());
       if (bytes < 0) {
         BOOST_LOG(error) << "Couldn't encode audio: "sv << opus_strerror(bytes);
         packets->stop();
@@ -228,7 +228,7 @@ namespace audio {
     int samples_per_frame = frame_size * stream.channelCount;
 
     while (!shutdown_event->peek()) {
-      std::vector<std::int16_t> sample_buffer;
+      std::vector<float> sample_buffer;
       sample_buffer.resize(samples_per_frame);
 
       auto status = mic->sample(sample_buffer);

diff --git a/src/logging.cpp b/src/logging.cpp
@@ -169,4 +169,25 @@ namespace logging {
       << "        -p | Enable/Disable UPnP"sv << std::endl
       << std::endl;
   }
+
+  std::string
+  bracket(const std::string &input) {
+    return bracket(std::string_view(input));
+  }
+
+  std::string
+  bracket(const std::string_view &input) {
+    return "["s + std::string(input) + "]"s;
+  }
+
+  std::wstring
+  bracket(const std::wstring &input) {
+    return bracket(std::wstring_view(input));
+  }
+
+  std::wstring
+  bracket(const std::wstring_view &input) {
+    return L"["s + std::wstring(input) + L"]"s;
+  }
+
 }  // namespace logging
diff --git a/src/logging.h b/src/logging.h
@@ -204,4 +204,36 @@ namespace logging {
     min_max_avg_periodic_logger<double> logger;
   };
 
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::string
+  bracket(const std::string &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::string
+  bracket(const std::string_view &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::wstring
+  bracket(const std::wstring &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::wstring
+  bracket(const std::wstring_view &input);
+
 }  // namespace logging
diff --git a/src/platform/common.h b/src/platform/common.h
@@ -525,7 +525,7 @@ namespace platf {
   class mic_t {
   public:
     virtual capture_e
-    sample(std::vector<std::int16_t> &frame_buffer) = 0;
+    sample(std::vector<float> &frame_buffer) = 0;
 
     virtual ~mic_t() = default;
   };

diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp
@@ -36,7 +36,7 @@ namespace platf {
   to_string(const char *name, const std::uint8_t *mapping, int channels) {
     std::stringstream ss;
 
-    ss << "rate=48000 sink_name="sv << name << " format=s16le channels="sv << channels << " channel_map="sv;
+    ss << "rate=48000 sink_name="sv << name << " format=float channels="sv << channels << " channel_map="sv;
     std::for_each_n(mapping, channels - 1, [&ss](std::uint8_t pos) {
       ss << pa_channel_position_to_string(position_mapping[pos]) << ',';
     });
@@ -54,12 +54,12 @@ namespace platf {
     util::safe_ptr<pa_simple, pa_simple_free> mic;
 
     capture_e
-    sample(std::vector<std::int16_t> &sample_buf) override {
+    sample(std::vector<float> &sample_buf) override {
       auto sample_size = sample_buf.size();
 
       auto buf = sample_buf.data();
       int status;
-      if (pa_simple_read(mic.get(), buf, sample_size * 2, &status)) {
+      if (pa_simple_read(mic.get(), buf, sample_size * sizeof(float), &status)) {
         BOOST_LOG(error) << "pa_simple_read() failed: "sv << pa_strerror(status);
 
         return capture_e::error;
@@ -73,7 +73,7 @@ namespace platf {
   microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size, std::string source_name) {
     auto mic = std::make_unique<mic_attr_t>();
 
-    pa_sample_spec ss { PA_SAMPLE_S16LE, sample_rate, (std::uint8_t) channels };
+    pa_sample_spec ss { PA_SAMPLE_FLOAT32, sample_rate, (std::uint8_t) channels };
     pa_channel_map pa_map;
 
     pa_map.channels = channels;
@@ -82,7 +82,8 @@ namespace platf {
     });
 
     pa_buffer_attr pa_attr = {};
-    pa_attr.maxlength = frame_size * 8;
+    pa_attr.fragsize = frame_size * channels * sizeof(float);
+    pa_attr.maxlength = pa_attr.fragsize * 2;
 
     int status;
 

diff --git a/src/platform/macos/av_audio.h b/src/platform/macos/av_audio.h
@@ -8,7 +8,7 @@
 
 #include "third-party/TPCircularBuffer/TPCircularBuffer.h"
 
-#define kBufferLength 2048
+#define kBufferLength 4096
 
 @interface AVAudio: NSObject <AVCaptureAudioDataOutputSampleBufferDelegate> {
 @public

diff --git a/src/platform/macos/av_audio.m b/src/platform/macos/av_audio.m
@@ -87,8 +87,8 @@ - (int)setupMicrophone:(AVCaptureDevice *)device sampleRate:(UInt32)sampleRate f
     (NSString *) AVFormatIDKey: [NSNumber numberWithUnsignedInt:kAudioFormatLinearPCM],
     (NSString *) AVSampleRateKey: [NSNumber numberWithUnsignedInt:sampleRate],
     (NSString *) AVNumberOfChannelsKey: [NSNumber numberWithUnsignedInt:channels],
-    (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:16],
-    (NSString *) AVLinearPCMIsFloatKey: @NO,
+    (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:32],
+    (NSString *) AVLinearPCMIsFloatKey: @YES,
     (NSString *) AVLinearPCMIsNonInterleaved: @NO
   }];
 

diff --git a/src/platform/macos/microphone.mm b/src/platform/macos/microphone.mm
@@ -19,23 +19,23 @@
     }
 
     capture_e
-    sample(std::vector<std::int16_t> &sample_in) override {
+    sample(std::vector<float> &sample_in) override {
       auto sample_size = sample_in.size();
 
       uint32_t length = 0;
       void *byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length);
 
-      while (length < sample_size * sizeof(std::int16_t)) {
+      while (length < sample_size * sizeof(float)) {
         [av_audio_capture.samplesArrivedSignal wait];
         byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length);
       }
 
-      const int16_t *sampleBuffer = (int16_t *) byteSampleBuffer;
-      std::vector<int16_t> vectorBuffer(sampleBuffer, sampleBuffer + sample_size);
+      const float *sampleBuffer = (float *) byteSampleBuffer;
+      std::vector<float> vectorBuffer(sampleBuffer, sampleBuffer + sample_size);
 
       std::copy_n(std::begin(vectorBuffer), sample_size, std::begin(sample_in));
 
-      TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(std::int16_t));
+      TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(float));
 
       return capture_e::ok;
     }