From 7be0f49ad10b0ccfb15140697547762f9fd8f448 Mon Sep 17 00:00:00 2001
From: ns6089 <61738816+ns6089@users.noreply.github.com>
Date: Fri, 26 Jul 2024 04:01:43 +0300
Subject: [PATCH] Migrate audio pipeline to float from 16-bit integer (#2873)

Co-authored-by: Cameron Gutman <aicommander@gmail.com>
---
 src/audio.cpp                    |   6 +-
 src/logging.cpp                  |  21 +
 src/logging.h                    |  32 ++
 src/platform/common.h            |   2 +-
 src/platform/linux/audio.cpp     |  11 +-
 src/platform/macos/av_audio.h    |   2 +-
 src/platform/macos/av_audio.m    |   4 +-
 src/platform/macos/microphone.mm |  10 +-
 src/platform/windows/audio.cpp   | 641 +++++++++++++++++--------------
 9 files changed, 433 insertions(+), 296 deletions(-)
diff --git a/src/audio.cpp b/src/audio.cpp
index ac1947fec74..b24ae61350f 100644
--- a/src/audio.cpp
+++ b/src/audio.cpp
@@ -18,7 +18,7 @@
 namespace audio {
   using namespace std::literals;
   using opus_t = util::safe_ptr<OpusMSEncoder, opus_multistream_encoder_destroy>;
-  using sample_queue_t = std::shared_ptr<safe::queue_t<std::vector<std::int16_t>>>;
+  using sample_queue_t = std::shared_ptr<safe::queue_t<std::vector<float>>>;
 
   struct audio_ctx_t {
     // We want to change the sink for the first stream only
@@ -128,7 +128,7 @@ namespace audio {
     while (auto sample = samples->pop()) {
       buffer_t packet { 1400 };
 
-      int bytes = opus_multistream_encode(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size());
+      int bytes = opus_multistream_encode_float(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size());
       if (bytes < 0) {
         BOOST_LOG(error) << "Couldn't encode audio: "sv << opus_strerror(bytes);
         packets->stop();
@@ -228,7 +228,7 @@ namespace audio {
     int samples_per_frame = frame_size * stream.channelCount;
 
     while (!shutdown_event->peek()) {
-      std::vector<std::int16_t> sample_buffer;
+      std::vector<float> sample_buffer;
       sample_buffer.resize(samples_per_frame);
 
       auto status = mic->sample(sample_buffer);
diff --git a/src/logging.cpp b/src/logging.cpp
index c96ee6a961a..82ca850c80a 100644
--- a/src/logging.cpp
+++ b/src/logging.cpp
@@ -169,4 +169,25 @@ namespace logging {
       << "        -p | Enable/Disable UPnP"sv << std::endl
       << std::endl;
   }
+
+  std::string
+  bracket(const std::string &input) {
+    return bracket(std::string_view(input));
+  }
+
+  std::string
+  bracket(const std::string_view &input) {
+    return "["s + std::string(input) + "]"s;
+  }
+
+  std::wstring
+  bracket(const std::wstring &input) {
+    return bracket(std::wstring_view(input));
+  }
+
+  std::wstring
+  bracket(const std::wstring_view &input) {
+    return L"["s + std::wstring(input) + L"]"s;
+  }
+
 }  // namespace logging
diff --git a/src/logging.h b/src/logging.h
index 62600ff424c..6fe4645fc92 100644
--- a/src/logging.h
+++ b/src/logging.h
@@ -204,4 +204,36 @@ namespace logging {
     min_max_avg_periodic_logger<double> logger;
   };
 
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::string
+  bracket(const std::string &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::string
+  bracket(const std::string_view &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::wstring
+  bracket(const std::wstring &input);
+
+  /**
+   * @brief Enclose string in square brackets.
+   * @param input Input string.
+   * @return Enclosed string.
+   */
+  std::wstring
+  bracket(const std::wstring_view &input);
+
 }  // namespace logging
diff --git a/src/platform/common.h b/src/platform/common.h
index 755e604aa0c..24cd658b04d 100644
--- a/src/platform/common.h
+++ b/src/platform/common.h
@@ -525,7 +525,7 @@ namespace platf {
   class mic_t {
   public:
     virtual capture_e
-    sample(std::vector<std::int16_t> &frame_buffer) = 0;
+    sample(std::vector<float> &frame_buffer) = 0;
 
     virtual ~mic_t() = default;
   };
diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp
index f742accbeec..32aa36a61b5 100644
--- a/src/platform/linux/audio.cpp
+++ b/src/platform/linux/audio.cpp
@@ -36,7 +36,7 @@ namespace platf {
   to_string(const char *name, const std::uint8_t *mapping, int channels) {
     std::stringstream ss;
 
-    ss << "rate=48000 sink_name="sv << name << " format=s16le channels="sv << channels << " channel_map="sv;
+    ss << "rate=48000 sink_name="sv << name << " format=float channels="sv << channels << " channel_map="sv;
     std::for_each_n(mapping, channels - 1, [&ss](std::uint8_t pos) {
       ss << pa_channel_position_to_string(position_mapping[pos]) << ',';
     });
@@ -54,12 +54,12 @@ namespace platf {
     util::safe_ptr<pa_simple, pa_simple_free> mic;
 
     capture_e
-    sample(std::vector<std::int16_t> &sample_buf) override {
+    sample(std::vector<float> &sample_buf) override {
       auto sample_size = sample_buf.size();
 
       auto buf = sample_buf.data();
       int status;
-      if (pa_simple_read(mic.get(), buf, sample_size * 2, &status)) {
+      if (pa_simple_read(mic.get(), buf, sample_size * sizeof(float), &status)) {
         BOOST_LOG(error) << "pa_simple_read() failed: "sv << pa_strerror(status);
 
         return capture_e::error;
@@ -73,7 +73,7 @@ namespace platf {
   microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size, std::string source_name) {
     auto mic = std::make_unique<mic_attr_t>();
 
-    pa_sample_spec ss { PA_SAMPLE_S16LE, sample_rate, (std::uint8_t) channels };
+    pa_sample_spec ss { PA_SAMPLE_FLOAT32, sample_rate, (std::uint8_t) channels };
     pa_channel_map pa_map;
 
     pa_map.channels = channels;
@@ -82,7 +82,8 @@ namespace platf {
     });
 
     pa_buffer_attr pa_attr = {};
-    pa_attr.maxlength = frame_size * 8;
+    pa_attr.fragsize = frame_size * channels * sizeof(float);
+    pa_attr.maxlength = pa_attr.fragsize * 2;
 
     int status;
 
diff --git a/src/platform/macos/av_audio.h b/src/platform/macos/av_audio.h
index e3c169eb4be..bf5b13b3d29 100644
--- a/src/platform/macos/av_audio.h
+++ b/src/platform/macos/av_audio.h
@@ -8,7 +8,7 @@
 
 #include "third-party/TPCircularBuffer/TPCircularBuffer.h"
 
-#define kBufferLength 2048
+#define kBufferLength 4096
 
 @interface AVAudio: NSObject <AVCaptureAudioDataOutputSampleBufferDelegate> {
 @public
diff --git a/src/platform/macos/av_audio.m b/src/platform/macos/av_audio.m
index 7958abce2fa..f0e631ef311 100644
--- a/src/platform/macos/av_audio.m
+++ b/src/platform/macos/av_audio.m
@@ -87,8 +87,8 @@ - (int)setupMicrophone:(AVCaptureDevice *)device sampleRate:(UInt32)sampleRate f
     (NSString *) AVFormatIDKey: [NSNumber numberWithUnsignedInt:kAudioFormatLinearPCM],
     (NSString *) AVSampleRateKey: [NSNumber numberWithUnsignedInt:sampleRate],
     (NSString *) AVNumberOfChannelsKey: [NSNumber numberWithUnsignedInt:channels],
-    (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:16],
-    (NSString *) AVLinearPCMIsFloatKey: @NO,
+    (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:32],
+    (NSString *) AVLinearPCMIsFloatKey: @YES,
     (NSString *) AVLinearPCMIsNonInterleaved: @NO
   }];
 
diff --git a/src/platform/macos/microphone.mm b/src/platform/macos/microphone.mm
index f82db075b13..1e3a4cd65ed 100644
--- a/src/platform/macos/microphone.mm
+++ b/src/platform/macos/microphone.mm
@@ -19,23 +19,23 @@
     }
 
     capture_e
-    sample(std::vector<std::int16_t> &sample_in) override {
+    sample(std::vector<float> &sample_in) override {
       auto sample_size = sample_in.size();
 
       uint32_t length = 0;
       void *byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length);
 
-      while (length < sample_size * sizeof(std::int16_t)) {
+      while (length < sample_size * sizeof(float)) {
         [av_audio_capture.samplesArrivedSignal wait];
         byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length);
       }
 
-      const int16_t *sampleBuffer = (int16_t *) byteSampleBuffer;
-      std::vector<int16_t> vectorBuffer(sampleBuffer, sampleBuffer + sample_size);
+      const float *sampleBuffer = (float *) byteSampleBuffer;
+      std::vector<float> vectorBuffer(sampleBuffer, sampleBuffer + sample_size);
 
       std::copy_n(std::begin(vectorBuffer), sample_size, std::begin(sample_in));
 
-      TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(std::int16_t));
+      TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(float));
 
       return capture_e::ok;
     }
diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp
index bdcd66aba87..bc208128bc7 100644
--- a/src/platform/windows/audio.cpp
+++ b/src/platform/windows/audio.cpp
@@ -36,12 +36,168 @@ DEFINE_PROPERTYKEY(PKEY_DeviceInterface_FriendlyName, 0x026e516e, 0xb814, 0x414b
   #warning No known Steam audio driver for this architecture
 #endif
 
-using namespace std::literals;
-namespace platf::audio {
-  constexpr auto SAMPLE_RATE = 48000;
+namespace {
 
+  constexpr auto SAMPLE_RATE = 48000;
   constexpr auto STEAM_AUDIO_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingSpeakers.inf";
 
+  constexpr auto waveformat_mask_stereo = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT;
+
+  constexpr auto waveformat_mask_surround51_with_backspeakers = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT |
+                                                                SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY |
+                                                                SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT;
+
+  constexpr auto waveformat_mask_surround51_with_sidespeakers = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT |
+                                                                SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY |
+                                                                SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT;
+
+  constexpr auto waveformat_mask_surround71 = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT |
+                                              SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY |
+                                              SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT |
+                                              SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT;
+
+  enum class sample_format_e {
+    f32,
+    s32,
+    s24in32,
+    s24,
+    s16,
+    _size,
+  };
+
+  constexpr WAVEFORMATEXTENSIBLE
+  create_waveformat(sample_format_e sample_format, WORD channel_count, DWORD channel_mask) {
+    WAVEFORMATEXTENSIBLE waveformat = {};
+
+    switch (sample_format) {
+      default:
+      case sample_format_e::f32:
+        waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT;
+        waveformat.Format.wBitsPerSample = 32;
+        waveformat.Samples.wValidBitsPerSample = 32;
+        break;
+
+      case sample_format_e::s32:
+        waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
+        waveformat.Format.wBitsPerSample = 32;
+        waveformat.Samples.wValidBitsPerSample = 32;
+        break;
+
+      case sample_format_e::s24in32:
+        waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
+        waveformat.Format.wBitsPerSample = 32;
+        waveformat.Samples.wValidBitsPerSample = 24;
+        break;
+
+      case sample_format_e::s24:
+        waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
+        waveformat.Format.wBitsPerSample = 24;
+        waveformat.Samples.wValidBitsPerSample = 24;
+        break;
+
+      case sample_format_e::s16:
+        waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
+        waveformat.Format.wBitsPerSample = 16;
+        waveformat.Samples.wValidBitsPerSample = 16;
+        break;
+    }
+
+    static_assert((int) sample_format_e::_size == 5, "Unrecognized sample_format_e");
+
+    waveformat.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
+    waveformat.Format.nChannels = channel_count;
+    waveformat.Format.nSamplesPerSec = SAMPLE_RATE;
+
+    waveformat.Format.nBlockAlign = waveformat.Format.nChannels * waveformat.Format.wBitsPerSample / 8;
+    waveformat.Format.nAvgBytesPerSec = waveformat.Format.nSamplesPerSec * waveformat.Format.nBlockAlign;
+    waveformat.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
+
+    waveformat.dwChannelMask = channel_mask;
+
+    return waveformat;
+  }
+
+  using virtual_sink_waveformats_t = std::vector<WAVEFORMATEXTENSIBLE>;
+
+  template <WORD channel_count>
+  virtual_sink_waveformats_t
+  create_virtual_sink_waveformats() {
+    if constexpr (channel_count == 2) {
+      auto channel_mask = waveformat_mask_stereo;
+      return {
+        create_waveformat(sample_format_e::f32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s24in32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s24, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s16, channel_count, channel_mask),
+      };
+    }
+    else if (channel_count == 6) {
+      auto channel_mask1 = waveformat_mask_surround51_with_backspeakers;
+      auto channel_mask2 = waveformat_mask_surround51_with_sidespeakers;
+      return {
+        create_waveformat(sample_format_e::f32, channel_count, channel_mask1),
+        create_waveformat(sample_format_e::f32, channel_count, channel_mask2),
+        create_waveformat(sample_format_e::s32, channel_count, channel_mask1),
+        create_waveformat(sample_format_e::s32, channel_count, channel_mask2),
+        create_waveformat(sample_format_e::s24in32, channel_count, channel_mask1),
+        create_waveformat(sample_format_e::s24in32, channel_count, channel_mask2),
+        create_waveformat(sample_format_e::s24, channel_count, channel_mask1),
+        create_waveformat(sample_format_e::s24, channel_count, channel_mask2),
+        create_waveformat(sample_format_e::s16, channel_count, channel_mask1),
+        create_waveformat(sample_format_e::s16, channel_count, channel_mask2),
+      };
+    }
+    else if (channel_count == 8) {
+      auto channel_mask = waveformat_mask_surround71;
+      return {
+        create_waveformat(sample_format_e::f32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s24in32, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s24, channel_count, channel_mask),
+        create_waveformat(sample_format_e::s16, channel_count, channel_mask),
+      };
+    }
+  }
+
+  std::string
+  waveformat_to_pretty_string(const WAVEFORMATEXTENSIBLE &waveformat) {
+    std::string result = waveformat.SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ? "F" :
+                         waveformat.SubFormat == KSDATAFORMAT_SUBTYPE_PCM        ? "S" :
+                                                                                   "UNKNOWN";
+
+    result += std::to_string(waveformat.Samples.wValidBitsPerSample) + " " +
+              std::to_string(waveformat.Format.nSamplesPerSec) + " ";
+
+    switch (waveformat.dwChannelMask) {
+      case (waveformat_mask_stereo):
+        result += "2.0";
+        break;
+
+      case (waveformat_mask_surround51_with_backspeakers):
+        result += "5.1";
+        break;
+
+      case (waveformat_mask_surround51_with_sidespeakers):
+        result += "5.1 (sidespeakers)";
+        break;
+
+      case (waveformat_mask_surround71):
+        result += "7.1";
+        break;
+
+      default:
+        result += std::to_string(waveformat.Format.nChannels) + " channels (unrecognized)";
+        break;
+    }
+
+    return result;
+  }
+
+}  // namespace
+
+using namespace std::literals;
+namespace platf::audio {
   template <class T>
   void
   Release(T *p) {
@@ -90,109 +246,33 @@ namespace platf::audio {
   };
 
   struct format_t {
-    enum type_e : int {
-      none,  ///< No format
-      stereo,  ///< Stereo
-      surr51,  ///< Surround 5.1
-      surr71,  ///< Surround 7.1
-    } type;
-
-    std::string_view name;
-    int channels;
-    int channel_mask;
-  } formats[] {
-    {
-      format_t::stereo,
-      "Stereo"sv,
+    WORD channel_count;
+    std::string name;
+    int capture_waveformat_channel_mask;
+    virtual_sink_waveformats_t virtual_sink_waveformats;
+  };
+
+  const std::array<const format_t, 3> formats = {
+    format_t {
       2,
-      SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT,
+      "Stereo",
+      waveformat_mask_stereo,
+      create_virtual_sink_waveformats<2>(),
     },
-    {
-      format_t::surr51,
-      "Surround 5.1"sv,
+    format_t {
       6,
-      SPEAKER_FRONT_LEFT |
-        SPEAKER_FRONT_RIGHT |
-        SPEAKER_FRONT_CENTER |
-        SPEAKER_LOW_FREQUENCY |
-        SPEAKER_BACK_LEFT |
-        SPEAKER_BACK_RIGHT,
+      "Surround 5.1",
+      waveformat_mask_surround51_with_backspeakers,
+      create_virtual_sink_waveformats<6>(),
     },
-    {
-      format_t::surr71,
-      "Surround 7.1"sv,
+    format_t {
       8,
-      SPEAKER_FRONT_LEFT |
-        SPEAKER_FRONT_RIGHT |
-        SPEAKER_FRONT_CENTER |
-        SPEAKER_LOW_FREQUENCY |
-        SPEAKER_BACK_LEFT |
-        SPEAKER_BACK_RIGHT |
-        SPEAKER_SIDE_LEFT |
-        SPEAKER_SIDE_RIGHT,
+      "Surround 7.1",
+      waveformat_mask_surround71,
+      create_virtual_sink_waveformats<8>(),
     },
   };
 
-  static format_t surround_51_side_speakers {
-    format_t::surr51,
-    "Surround 5.1"sv,
-    6,
-    SPEAKER_FRONT_LEFT |
-      SPEAKER_FRONT_RIGHT |
-      SPEAKER_FRONT_CENTER |
-      SPEAKER_LOW_FREQUENCY |
-      SPEAKER_SIDE_LEFT |
-      SPEAKER_SIDE_RIGHT,
-  };
-
-  WAVEFORMATEXTENSIBLE
-  create_wave_format(const format_t &format) {
-    WAVEFORMATEXTENSIBLE wave_format;
-
-    wave_format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE;
-    wave_format.Format.nChannels = format.channels;
-    wave_format.Format.nSamplesPerSec = SAMPLE_RATE;
-    wave_format.Format.wBitsPerSample = 16;
-    wave_format.Format.nBlockAlign = wave_format.Format.nChannels * wave_format.Format.wBitsPerSample / 8;
-    wave_format.Format.nAvgBytesPerSec = wave_format.Format.nSamplesPerSec * wave_format.Format.nBlockAlign;
-    wave_format.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX);
-
-    wave_format.Samples.wValidBitsPerSample = 16;
-    wave_format.dwChannelMask = format.channel_mask;
-    wave_format.SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
-
-    return wave_format;
-  }
-
-  int
-  set_wave_format(audio::wave_format_t &wave_format, const format_t &format) {
-    wave_format->nSamplesPerSec = SAMPLE_RATE;
-    wave_format->wBitsPerSample = 16;
-
-    switch (wave_format->wFormatTag) {
-      case WAVE_FORMAT_PCM:
-        break;
-      case WAVE_FORMAT_IEEE_FLOAT:
-        break;
-      case WAVE_FORMAT_EXTENSIBLE: {
-        auto wave_ex = (PWAVEFORMATEXTENSIBLE) wave_format.get();
-        wave_ex->Samples.wValidBitsPerSample = 16;
-        wave_ex->dwChannelMask = format.channel_mask;
-        wave_ex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM;
-        break;
-      }
-      default:
-        BOOST_LOG(error) << "Unsupported Wave Format: [0x"sv << util::hex(wave_format->wFormatTag).to_string_view() << ']';
-        return -1;
-    };
-
-    wave_format->nChannels = format.channels;
-    wave_format->nBlockAlign = wave_format->nChannels * wave_format->wBitsPerSample / 8;
-    wave_format->nAvgBytesPerSec = wave_format->nSamplesPerSec * wave_format->nBlockAlign;
-
-    return 0;
-  }
-
   audio_client_t
   make_audio_client(device_t &device, const format_t &format) {
     audio_client_t audio_client;
@@ -208,45 +288,42 @@ namespace platf::audio {
       return nullptr;
     }
 
-    WAVEFORMATEXTENSIBLE wave_format = create_wave_format(format);
+    WAVEFORMATEXTENSIBLE capture_waveformat =
+      create_waveformat(sample_format_e::f32, format.channel_count, format.capture_waveformat_channel_mask);
+
+    {
+      wave_format_t mixer_waveformat;
+      status = audio_client->GetMixFormat(&mixer_waveformat);
+      if (FAILED(status)) {
+        BOOST_LOG(error) << "Couldn't get mix format for audio device: [0x"sv << util::hex(status).to_string_view() << ']';
+        return nullptr;
+      }
+
+      // Prefer the native channel layout of captured audio device when channel counts match
+      if (mixer_waveformat->nChannels == format.channel_count &&
+          mixer_waveformat->wFormatTag == WAVE_FORMAT_EXTENSIBLE &&
+          mixer_waveformat->cbSize >= 22) {
+        auto waveformatext_pointer = reinterpret_cast<const WAVEFORMATEXTENSIBLE *>(mixer_waveformat.get());
+        capture_waveformat.dwChannelMask = waveformatext_pointer->dwChannelMask;
+      }
+    }
 
     status = audio_client->Initialize(
       AUDCLNT_SHAREMODE_SHARED,
       AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_EVENTCALLBACK |
         AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY,  // Enable automatic resampling to 48 KHz
       0, 0,
-      (LPWAVEFORMATEX) &wave_format,
+      (LPWAVEFORMATEX) &capture_waveformat,
       nullptr);
 
     if (status) {
-      BOOST_LOG(debug) << "Couldn't initialize audio client for ["sv << format.name << "]: [0x"sv << util::hex(status).to_string_view() << ']';
+      BOOST_LOG(error) << "Couldn't initialize audio client for ["sv << format.name << "]: [0x"sv << util::hex(status).to_string_view() << ']';
       return nullptr;
     }
 
-    return audio_client;
-  }
-
-  const wchar_t *
-  no_null(const wchar_t *str) {
-    return str ? str : L"Unknown";
-  }
-
-  bool
-  validate_device(device_t &device) {
-    bool valid = false;
-
-    // Check for any valid format
-    for (const auto &format : formats) {
-      auto audio_client = make_audio_client(device, format);
-
-      BOOST_LOG(debug) << format.name << ": "sv << (!audio_client ? "unsupported"sv : "supported"sv);
-
-      if (audio_client) {
-        valid = true;
-      }
-    }
+    BOOST_LOG(info) << "Audio capture format is " << logging::bracket(waveformat_to_pretty_string(capture_waveformat));
 
-    return valid;
+    return audio_client;
   }
 
   device_t
@@ -341,7 +418,7 @@ namespace platf::audio {
   class mic_wasapi_t: public mic_t {
   public:
     capture_e
-    sample(std::vector<std::int16_t> &sample_out) override {
+    sample(std::vector<float> &sample_out) override {
       auto sample_size = sample_out.size();
 
       // Refill the sample buffer if needed
@@ -398,9 +475,10 @@ namespace platf::audio {
         return -1;
       }
 
-      for (auto &format : formats) {
-        if (format.channels != channels_out) {
-          BOOST_LOG(debug) << "Skipping audio format ["sv << format.name << "] with channel count ["sv << format.channels << " != "sv << channels_out << ']';
+      for (const auto &format : formats) {
+        if (format.channel_count != channels_out) {
+          BOOST_LOG(debug) << "Skipping audio format ["sv << format.name << "] with channel count ["sv
+                           << format.channel_count << " != "sv << channels_out << ']';
           continue;
         }
 
@@ -432,7 +510,7 @@ namespace platf::audio {
       }
 
       // *2 --> needs to fit double
-      sample_buf = util::buffer_t<std::int16_t> { std::max(frames, frame_size) * 2 * channels_out };
+      sample_buf = util::buffer_t<float> { std::max(frames, frame_size) * 2 * channels_out };
       sample_buf_pos = std::begin(sample_buf);
 
       status = audio_client->GetService(IID_IAudioCaptureClient, (void **) &audio_capture);
@@ -489,7 +567,7 @@ namespace platf::audio {
       // Total number of samples
       struct sample_aligned_t {
         std::uint32_t uninitialized;
-        std::int16_t *samples;
+        float *samples;
       } sample_aligned;
 
       // number of samples / number of channels
@@ -588,8 +666,8 @@ namespace platf::audio {
 
     REFERENCE_TIME default_latency_ms;
 
-    util::buffer_t<std::int16_t> sample_buf;
-    std::int16_t *sample_buf_pos;
+    util::buffer_t<float> sample_buf;
+    float *sample_buf_pos;
     int channels;
 
     HANDLE mmcss_task_handle = NULL;
@@ -599,117 +677,77 @@ namespace platf::audio {
   public:
     std::optional<sink_t>
     sink_info() override {
-      auto virtual_adapter_name = L"Steam Streaming Speakers"sv;
-
       sink_t sink;
 
-      auto device = default_device(device_enum);
-      if (!device) {
-        return std::nullopt;
-      }
-
-      audio::wstring_t wstring;
-      device->GetId(&wstring);
-
-      sink.host = to_utf8(wstring.get());
-
-      collection_t collection;
-      auto status = device_enum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &collection);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Couldn't enumerate: [0x"sv << util::hex(status).to_string_view() << ']';
-
-        return std::nullopt;
-      }
-
-      UINT count;
-      collection->GetCount(&count);
-
-      // If the sink isn't a device name, we'll assume it's a device ID
-      auto virtual_device_id = find_device_id_by_name(config::audio.virtual_sink).value_or(from_utf8(config::audio.virtual_sink));
-      auto virtual_device_found = false;
-
-      for (auto x = 0; x < count; ++x) {
-        audio::device_t device;
-        collection->Item(x, &device);
-
-        if (!validate_device(device)) {
-          continue;
+      // Fill host sink name with the device_id of the current default audio device.
+      {
+        auto device = default_device(device_enum);
+        if (!device) {
+          return std::nullopt;
         }
 
-        audio::wstring_t wstring;
-        device->GetId(&wstring);
-        std::wstring device_id { wstring.get() };
-
-        audio::prop_t prop;
-        device->OpenPropertyStore(STGM_READ, &prop);
-
-        prop_var_t adapter_friendly_name;
-        prop_var_t device_friendly_name;
-        prop_var_t device_desc;
+        audio::wstring_t id;
+        device->GetId(&id);
 
-        prop->GetValue(PKEY_Device_FriendlyName, &device_friendly_name.prop);
-        prop->GetValue(PKEY_DeviceInterface_FriendlyName, &adapter_friendly_name.prop);
-        prop->GetValue(PKEY_Device_DeviceDesc, &device_desc.prop);
+        sink.host = to_utf8(id.get());
+      }
 
-        auto adapter_name = no_null((LPWSTR) adapter_friendly_name.prop.pszVal);
-        BOOST_LOG(verbose)
-          << L"===== Device ====="sv << std::endl
-          << L"Device ID          : "sv << wstring.get() << std::endl
-          << L"Device name        : "sv << no_null((LPWSTR) device_friendly_name.prop.pszVal) << std::endl
-          << L"Adapter name       : "sv << adapter_name << std::endl
-          << L"Device description : "sv << no_null((LPWSTR) device_desc.prop.pszVal) << std::endl
-          << std::endl;
-
-        if (virtual_device_id.empty() && adapter_name == virtual_adapter_name) {
-          virtual_device_id = std::move(device_id);
-          virtual_device_found = true;
-          break;
-        }
-        else if (virtual_device_id == device_id) {
-          virtual_device_found = true;
-          break;
-        }
+      // Prepare to search for the device_id of the virtual audio sink device,
+      // this device can be either user-configured or
+      // the Steam Streaming Speakers we use by default.
+      match_fields_list_t match_list;
+      if (config::audio.virtual_sink.empty()) {
+        match_list = match_steam_speakers();
+      }
+      else {
+        match_list = match_all_fields(from_utf8(config::audio.virtual_sink));
       }
 
-      if (virtual_device_found) {
-        auto name_suffix = to_utf8(virtual_device_id);
+      // Search for the virtual audio sink device currently present in the system.
+      auto matched = find_device_id(match_list);
+      if (matched) {
+        // Prepare to fill virtual audio sink names with device_id.
+        auto device_id = to_utf8(matched->second);
+        // Also prepend format name (basically channel layout at the moment)
+        // because we don't want to extend the platform interface.
         sink.null = std::make_optional(sink_t::null_t {
-          "virtual-"s.append(formats[format_t::stereo - 1].name) + name_suffix,
-          "virtual-"s.append(formats[format_t::surr51 - 1].name) + name_suffix,
-          "virtual-"s.append(formats[format_t::surr71 - 1].name) + name_suffix,
+          "virtual-"s + formats[0].name + device_id,
+          "virtual-"s + formats[1].name + device_id,
+          "virtual-"s + formats[2].name + device_id,
         });
       }
-      else if (!virtual_device_id.empty()) {
-        BOOST_LOG(warning) << "Unable to find the specified virtual sink: "sv << virtual_device_id;
+      else if (!config::audio.virtual_sink.empty()) {
+        BOOST_LOG(warning) << "Couldn't find the specified virtual audio sink " << config::audio.virtual_sink;
       }
 
       return sink;
     }
 
     /**
-     * @brief Gets information encoded in the raw sink name
-     * @param sink The raw sink name
-     * @return A pair of type and the real sink name
+     * @brief Extract virtual audio sink information possibly encoded in the sink name.
+     * @param sink The sink name
+     * @return A pair of device_id and format reference if the sink name matches
+     *         our naming scheme for virtual audio sinks, `std::nullopt` otherwise.
      */
-    std::pair<format_t::type_e, std::string_view>
-    get_sink_info(const std::string &sink) {
-      std::string_view sv { sink.c_str(), sink.size() };
-
-      // sink format:
+    std::optional<std::pair<std::wstring, std::reference_wrapper<const format_t>>>
+    extract_virtual_sink_info(const std::string &sink) {
+      // Encoding format:
       // [virtual-(format name)]device_id
+      std::string current = sink;
       auto prefix = "virtual-"sv;
-      if (sv.find(prefix) == 0) {
-        sv = sv.substr(prefix.size(), sv.size() - prefix.size());
+      if (current.find(prefix) == 0) {
+        current = current.substr(prefix.size(), current.size() - prefix.size());
 
-        for (auto &format : formats) {
+        for (const auto &format : formats) {
           auto &name = format.name;
-          if (sv.find(name) == 0) {
-            return std::make_pair(format.type, sv.substr(name.size(), sv.size() - name.size()));
+          if (current.find(name) == 0) {
+            auto device_id = from_utf8(current.substr(name.size(), current.size() - name.size()));
+            return std::make_pair(device_id, std::reference_wrapper(format));
           }
         }
       }
 
-      return std::make_pair(format_t::none, sv);
+      return std::nullopt;
     }
 
     std::unique_ptr<mic_t>
@@ -721,8 +759,8 @@ namespace platf::audio {
       }
 
       // If this is a virtual sink, set a callback that will change the sink back if it's changed
-      auto sink_info = get_sink_info(assigned_sink);
-      if (sink_info.first != format_t::none) {
+      auto virtual_sink_info = extract_virtual_sink_info(assigned_sink);
+      if (virtual_sink_info) {
         mic->default_endpt_changed_cb = [this] {
           BOOST_LOG(info) << "Resetting sink to ["sv << assigned_sink << "] after default changed";
           set_sink(assigned_sink);
@@ -742,55 +780,56 @@ namespace platf::audio {
      */
     std::optional<std::wstring>
     set_format(const std::string &sink) {
-      auto sink_info = get_sink_info(sink);
-
-      // If the sink isn't a device name, we'll assume it's a device ID
-      auto wstring_device_id = find_device_id_by_name(sink).value_or(from_utf8(sink_info.second.data()));
-
-      if (sink_info.first == format_t::none) {
-        // wstring_device_id does not contain virtual-(format name)
-        // It's a simple deviceId, just pass it back
-        return std::make_optional(std::move(wstring_device_id));
-      }
-
-      wave_format_t wave_format;
-      auto status = policy->GetMixFormat(wstring_device_id.c_str(), &wave_format);
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Couldn't acquire Wave Format [0x"sv << util::hex(status).to_string_view() << ']';
-
+      if (sink.empty()) {
         return std::nullopt;
       }
 
-      set_wave_format(wave_format, formats[(int) sink_info.first - 1]);
-
-      WAVEFORMATEXTENSIBLE p {};
-      status = policy->SetDeviceFormat(wstring_device_id.c_str(), wave_format.get(), (WAVEFORMATEX *) &p);
+      auto virtual_sink_info = extract_virtual_sink_info(sink);
 
-      // Surround 5.1 might contain side-{left, right} instead of speaker in the back
-      // Try again with different speaker mask.
-      if (status == 0x88890008 && sink_info.first == format_t::surr51) {
-        set_wave_format(wave_format, surround_51_side_speakers);
-        status = policy->SetDeviceFormat(wstring_device_id.c_str(), wave_format.get(), (WAVEFORMATEX *) &p);
+      if (!virtual_sink_info) {
+        // Sink name does not begin with virtual-(format name), hence it's not a virtual sink
+        // and we don't want to change playback format of the corresponding device.
+        // Also need to perform matching, sink name is not necessarily device_id in this case.
+        auto matched = find_device_id(match_all_fields(from_utf8(sink)));
+        if (matched) {
+          return matched->second;
+        }
+        else {
+          BOOST_LOG(error) << "Couldn't find audio sink " << sink;
+          return std::nullopt;
+        }
       }
-      if (FAILED(status)) {
-        BOOST_LOG(error) << "Couldn't set Wave Format [0x"sv << util::hex(status).to_string_view() << ']';
 
-        return std::nullopt;
+      auto &device_id = virtual_sink_info->first;
+      auto &waveformats = virtual_sink_info->second.get().virtual_sink_waveformats;
+      for (const auto &waveformat : waveformats) {
+        // We're using completely undocumented and unlisted API,
+        // better not pass objects without copying them first.
+        auto device_id_copy = device_id;
+        auto waveformat_copy = waveformat;
+        auto waveformat_copy_pointer = reinterpret_cast<WAVEFORMATEX *>(&waveformat_copy);
+
+        WAVEFORMATEXTENSIBLE p {};
+        if (SUCCEEDED(policy->SetDeviceFormat(device_id_copy.c_str(), waveformat_copy_pointer, (WAVEFORMATEX *) &p))) {
+          BOOST_LOG(info) << "Changed virtual audio sink format to " << logging::bracket(waveformat_to_pretty_string(waveformat));
+          return device_id;
+        }
       }
 
-      return std::make_optional(std::move(wstring_device_id));
+      BOOST_LOG(error) << "Couldn't set virtual audio sink waveformat";
+      return std::nullopt;
     }
 
     int
     set_sink(const std::string &sink) override {
-      auto wstring_device_id = set_format(sink);
-      if (!wstring_device_id) {
+      auto device_id = set_format(sink);
+      if (!device_id) {
         return -1;
       }
 
       int failure {};
       for (int x = 0; x < (int) ERole_enum_count; ++x) {
-        auto status = policy->SetDefaultEndpoint(wstring_device_id->c_str(), (ERole) x);
+        auto status = policy->SetDefaultEndpoint(device_id->c_str(), (ERole) x);
         if (status) {
           // Depending on the format of the string, we could get either of these errors
           if (status == HRESULT_FROM_WIN32(ERROR_NOT_FOUND) || status == E_INVALIDARG) {
@@ -813,14 +852,41 @@ namespace platf::audio {
       return failure;
     }
 
+    enum class match_field_e {
+      device_id,  ///< Match device_id
+      device_friendly_name,  ///< Match endpoint friendly name
+      adapter_friendly_name,  ///< Match adapter friendly name
+      device_description,  ///< Match endpoint description
+    };
+
+    using match_fields_list_t = std::vector<std::pair<match_field_e, std::wstring>>;
+    using matched_field_t = std::pair<match_field_e, std::wstring>;
+
+    audio_control_t::match_fields_list_t
+    match_steam_speakers() {
+      return {
+        { match_field_e::adapter_friendly_name, L"Steam Streaming Speakers" }
+      };
+    }
+
+    audio_control_t::match_fields_list_t
+    match_all_fields(const std::wstring &name) {
+      return {
+        { match_field_e::device_id, name },  // {0.0.0.00000000}.{29dd7668-45b2-4846-882d-950f55bf7eb8}
+        { match_field_e::device_friendly_name, name },  // Digital Audio (S/PDIF) (High Definition Audio Device)
+        { match_field_e::device_description, name },  // Digital Audio (S/PDIF)
+        { match_field_e::adapter_friendly_name, name },  // High Definition Audio Device
+      };
+    }
+
     /**
-     * @brief Find the audio device ID given a user-specified name.
-     * @param name The name provided by the user.
-     * @return The matching device ID, or nothing if not found.
+     * @brief Search for currently present audio device_id using multiple match fields.
+     * @param match_list Pairs of match fields and values
+     * @return Optional pair of matched field and device_id
      */
-    std::optional<std::wstring>
-    find_device_id_by_name(const std::string &name) {
-      if (name.empty()) {
+    std::optional<matched_field_t>
+    find_device_id(const match_fields_list_t &match_list) {
+      if (match_list.empty()) {
         return std::nullopt;
       }
 
@@ -828,25 +894,20 @@ namespace platf::audio {
       auto status = device_enum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &collection);
       if (FAILED(status)) {
         BOOST_LOG(error) << "Couldn't enumerate: [0x"sv << util::hex(status).to_string_view() << ']';
-
         return std::nullopt;
       }
 
-      UINT count;
+      UINT count = 0;
       collection->GetCount(&count);
 
-      auto wstring_name = from_utf8(name.data());
-
+      std::vector<std::wstring> matched(match_list.size());
       for (auto x = 0; x < count; ++x) {
         audio::device_t device;
         collection->Item(x, &device);
 
-        if (!validate_device(device)) {
-          continue;
-        }
-
         audio::wstring_t wstring_id;
         device->GetId(&wstring_id);
+        std::wstring device_id = wstring_id.get();
 
         audio::prop_t prop;
         device->OpenPropertyStore(STGM_READ, &prop);
@@ -859,15 +920,36 @@ namespace platf::audio {
         prop->GetValue(PKEY_DeviceInterface_FriendlyName, &adapter_friendly_name.prop);
         prop->GetValue(PKEY_Device_DeviceDesc, &device_desc.prop);
 
-        auto adapter_name = no_null((LPWSTR) adapter_friendly_name.prop.pszVal);
-        auto device_name = no_null((LPWSTR) device_friendly_name.prop.pszVal);
-        auto device_description = no_null((LPWSTR) device_desc.prop.pszVal);
+        for (size_t i = 0; i < match_list.size(); i++) {
+          if (matched[i].empty()) {
+            const wchar_t *match_value = nullptr;
+            switch (match_list[i].first) {
+              case match_field_e::device_id:
+                match_value = device_id.c_str();
+                break;
+
+              case match_field_e::device_friendly_name:
+                match_value = device_friendly_name.prop.pwszVal;
+                break;
+
+              case match_field_e::adapter_friendly_name:
+                match_value = adapter_friendly_name.prop.pwszVal;
+                break;
+
+              case match_field_e::device_description:
+                match_value = device_desc.prop.pwszVal;
+                break;
+            }
+            if (match_value && std::wcscmp(match_value, match_list[i].second.c_str()) == 0) {
+              matched[i] = device_id;
+            }
+          }
+        }
+      }
 
-        // Match the user-specified name against any of the user-visible strings
-        if (std::wcscmp(wstring_name.c_str(), adapter_name) == 0 ||
-            std::wcscmp(wstring_name.c_str(), device_name) == 0 ||
-            std::wcscmp(wstring_name.c_str(), device_description) == 0) {
-          return std::make_optional(std::wstring { wstring_id.get() });
+      for (size_t i = 0; i < match_list.size(); i++) {
+        if (!matched[i].empty()) {
+          return matched_field_t(match_list[i].first, matched[i]);
         }
       }
 
@@ -879,10 +961,11 @@ namespace platf::audio {
      */
     void
     reset_default_device() {
-      auto steam_device_id = find_device_id_by_name("Steam Streaming Speakers"s);
-      if (!steam_device_id) {
+      auto matched_steam = find_device_id(match_steam_speakers());
+      if (!matched_steam) {
         return;
       }
+      auto steam_device_id = matched_steam->second;
 
       {
         // Get the current default audio device (if present)
@@ -895,13 +978,13 @@ namespace platf::audio {
         current_default_dev->GetId(&current_default_id);
 
         // If Steam Streaming Speakers are already not default, we're done.
-        if (*steam_device_id != current_default_id.get()) {
+        if (steam_device_id != current_default_id.get()) {
           return;
         }
       }
 
       // Disable the Steam Streaming Speakers temporarily to allow the OS to pick a new default.
-      auto hr = policy->SetEndpointVisibility(steam_device_id->c_str(), FALSE);
+      auto hr = policy->SetEndpointVisibility(steam_device_id.c_str(), FALSE);
       if (FAILED(hr)) {
         BOOST_LOG(warning) << "Failed to disable Steam audio device: "sv << util::hex(hr).to_string_view();
         return;
@@ -911,7 +994,7 @@ namespace platf::audio {
       auto new_default_dev = default_device(device_enum);
 
       // Enable the Steam Streaming Speakers again
-      hr = policy->SetEndpointVisibility(steam_device_id->c_str(), TRUE);
+      hr = policy->SetEndpointVisibility(steam_device_id.c_str(), TRUE);
       if (FAILED(hr)) {
         BOOST_LOG(warning) << "Failed to enable Steam audio device: "sv << util::hex(hr).to_string_view();
         return;
@@ -1063,7 +1146,7 @@ namespace platf {
 
     // Install Steam Streaming Speakers if needed. We do this during audio_control() to ensure
     // the sink information returned includes the new Steam Streaming Speakers device.
-    if (config::audio.install_steam_drivers && !control->find_device_id_by_name("Steam Streaming Speakers"s)) {
+    if (config::audio.install_steam_drivers && !control->find_device_id(control->match_steam_speakers())) {
       // This is best effort. Don't fail if it doesn't work.
       control->install_steam_audio_drivers();
     }