From 7be0f49ad10b0ccfb15140697547762f9fd8f448 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Fri, 26 Jul 2024 04:01:43 +0300 Subject: [PATCH] Migrate audio pipeline to float from 16-bit integer (#2873) Co-authored-by: Cameron Gutman --- src/audio.cpp | 6 +- src/logging.cpp | 21 + src/logging.h | 32 ++ src/platform/common.h | 2 +- src/platform/linux/audio.cpp | 11 +- src/platform/macos/av_audio.h | 2 +- src/platform/macos/av_audio.m | 4 +- src/platform/macos/microphone.mm | 10 +- src/platform/windows/audio.cpp | 641 +++++++++++++++++-------------- 9 files changed, 433 insertions(+), 296 deletions(-) diff --git a/src/audio.cpp b/src/audio.cpp index ac1947fec74..b24ae61350f 100644 --- a/src/audio.cpp +++ b/src/audio.cpp @@ -18,7 +18,7 @@ namespace audio { using namespace std::literals; using opus_t = util::safe_ptr; - using sample_queue_t = std::shared_ptr>>; + using sample_queue_t = std::shared_ptr>>; struct audio_ctx_t { // We want to change the sink for the first stream only @@ -128,7 +128,7 @@ namespace audio { while (auto sample = samples->pop()) { buffer_t packet { 1400 }; - int bytes = opus_multistream_encode(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size()); + int bytes = opus_multistream_encode_float(opus.get(), sample->data(), frame_size, std::begin(packet), packet.size()); if (bytes < 0) { BOOST_LOG(error) << "Couldn't encode audio: "sv << opus_strerror(bytes); packets->stop(); @@ -228,7 +228,7 @@ namespace audio { int samples_per_frame = frame_size * stream.channelCount; while (!shutdown_event->peek()) { - std::vector sample_buffer; + std::vector sample_buffer; sample_buffer.resize(samples_per_frame); auto status = mic->sample(sample_buffer); diff --git a/src/logging.cpp b/src/logging.cpp index c96ee6a961a..82ca850c80a 100644 --- a/src/logging.cpp +++ b/src/logging.cpp @@ -169,4 +169,25 @@ namespace logging { << " -p | Enable/Disable UPnP"sv << std::endl << std::endl; } + + std::string + bracket(const std::string &input) { + return bracket(std::string_view(input)); + } + + std::string + bracket(const std::string_view &input) { + return "["s + std::string(input) + "]"s; + } + + std::wstring + bracket(const std::wstring &input) { + return bracket(std::wstring_view(input)); + } + + std::wstring + bracket(const std::wstring_view &input) { + return L"["s + std::wstring(input) + L"]"s; + } + } // namespace logging diff --git a/src/logging.h b/src/logging.h index 62600ff424c..6fe4645fc92 100644 --- a/src/logging.h +++ b/src/logging.h @@ -204,4 +204,36 @@ namespace logging { min_max_avg_periodic_logger logger; }; + /** + * @brief Enclose string in square brackets. + * @param input Input string. + * @return Enclosed string. + */ + std::string + bracket(const std::string &input); + + /** + * @brief Enclose string in square brackets. + * @param input Input string. + * @return Enclosed string. + */ + std::string + bracket(const std::string_view &input); + + /** + * @brief Enclose string in square brackets. + * @param input Input string. + * @return Enclosed string. + */ + std::wstring + bracket(const std::wstring &input); + + /** + * @brief Enclose string in square brackets. + * @param input Input string. + * @return Enclosed string. + */ + std::wstring + bracket(const std::wstring_view &input); + } // namespace logging diff --git a/src/platform/common.h b/src/platform/common.h index 755e604aa0c..24cd658b04d 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -525,7 +525,7 @@ namespace platf { class mic_t { public: virtual capture_e - sample(std::vector &frame_buffer) = 0; + sample(std::vector &frame_buffer) = 0; virtual ~mic_t() = default; }; diff --git a/src/platform/linux/audio.cpp b/src/platform/linux/audio.cpp index f742accbeec..32aa36a61b5 100644 --- a/src/platform/linux/audio.cpp +++ b/src/platform/linux/audio.cpp @@ -36,7 +36,7 @@ namespace platf { to_string(const char *name, const std::uint8_t *mapping, int channels) { std::stringstream ss; - ss << "rate=48000 sink_name="sv << name << " format=s16le channels="sv << channels << " channel_map="sv; + ss << "rate=48000 sink_name="sv << name << " format=float channels="sv << channels << " channel_map="sv; std::for_each_n(mapping, channels - 1, [&ss](std::uint8_t pos) { ss << pa_channel_position_to_string(position_mapping[pos]) << ','; }); @@ -54,12 +54,12 @@ namespace platf { util::safe_ptr mic; capture_e - sample(std::vector &sample_buf) override { + sample(std::vector &sample_buf) override { auto sample_size = sample_buf.size(); auto buf = sample_buf.data(); int status; - if (pa_simple_read(mic.get(), buf, sample_size * 2, &status)) { + if (pa_simple_read(mic.get(), buf, sample_size * sizeof(float), &status)) { BOOST_LOG(error) << "pa_simple_read() failed: "sv << pa_strerror(status); return capture_e::error; @@ -73,7 +73,7 @@ namespace platf { microphone(const std::uint8_t *mapping, int channels, std::uint32_t sample_rate, std::uint32_t frame_size, std::string source_name) { auto mic = std::make_unique(); - pa_sample_spec ss { PA_SAMPLE_S16LE, sample_rate, (std::uint8_t) channels }; + pa_sample_spec ss { PA_SAMPLE_FLOAT32, sample_rate, (std::uint8_t) channels }; pa_channel_map pa_map; pa_map.channels = channels; @@ -82,7 +82,8 @@ namespace platf { }); pa_buffer_attr pa_attr = {}; - pa_attr.maxlength = frame_size * 8; + pa_attr.fragsize = frame_size * channels * sizeof(float); + pa_attr.maxlength = pa_attr.fragsize * 2; int status; diff --git a/src/platform/macos/av_audio.h b/src/platform/macos/av_audio.h index e3c169eb4be..bf5b13b3d29 100644 --- a/src/platform/macos/av_audio.h +++ b/src/platform/macos/av_audio.h @@ -8,7 +8,7 @@ #include "third-party/TPCircularBuffer/TPCircularBuffer.h" -#define kBufferLength 2048 +#define kBufferLength 4096 @interface AVAudio: NSObject { @public diff --git a/src/platform/macos/av_audio.m b/src/platform/macos/av_audio.m index 7958abce2fa..f0e631ef311 100644 --- a/src/platform/macos/av_audio.m +++ b/src/platform/macos/av_audio.m @@ -87,8 +87,8 @@ - (int)setupMicrophone:(AVCaptureDevice *)device sampleRate:(UInt32)sampleRate f (NSString *) AVFormatIDKey: [NSNumber numberWithUnsignedInt:kAudioFormatLinearPCM], (NSString *) AVSampleRateKey: [NSNumber numberWithUnsignedInt:sampleRate], (NSString *) AVNumberOfChannelsKey: [NSNumber numberWithUnsignedInt:channels], - (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:16], - (NSString *) AVLinearPCMIsFloatKey: @NO, + (NSString *) AVLinearPCMBitDepthKey: [NSNumber numberWithUnsignedInt:32], + (NSString *) AVLinearPCMIsFloatKey: @YES, (NSString *) AVLinearPCMIsNonInterleaved: @NO }]; diff --git a/src/platform/macos/microphone.mm b/src/platform/macos/microphone.mm index f82db075b13..1e3a4cd65ed 100644 --- a/src/platform/macos/microphone.mm +++ b/src/platform/macos/microphone.mm @@ -19,23 +19,23 @@ } capture_e - sample(std::vector &sample_in) override { + sample(std::vector &sample_in) override { auto sample_size = sample_in.size(); uint32_t length = 0; void *byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length); - while (length < sample_size * sizeof(std::int16_t)) { + while (length < sample_size * sizeof(float)) { [av_audio_capture.samplesArrivedSignal wait]; byteSampleBuffer = TPCircularBufferTail(&av_audio_capture->audioSampleBuffer, &length); } - const int16_t *sampleBuffer = (int16_t *) byteSampleBuffer; - std::vector vectorBuffer(sampleBuffer, sampleBuffer + sample_size); + const float *sampleBuffer = (float *) byteSampleBuffer; + std::vector vectorBuffer(sampleBuffer, sampleBuffer + sample_size); std::copy_n(std::begin(vectorBuffer), sample_size, std::begin(sample_in)); - TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(std::int16_t)); + TPCircularBufferConsume(&av_audio_capture->audioSampleBuffer, sample_size * sizeof(float)); return capture_e::ok; } diff --git a/src/platform/windows/audio.cpp b/src/platform/windows/audio.cpp index bdcd66aba87..bc208128bc7 100644 --- a/src/platform/windows/audio.cpp +++ b/src/platform/windows/audio.cpp @@ -36,12 +36,168 @@ DEFINE_PROPERTYKEY(PKEY_DeviceInterface_FriendlyName, 0x026e516e, 0xb814, 0x414b #warning No known Steam audio driver for this architecture #endif -using namespace std::literals; -namespace platf::audio { - constexpr auto SAMPLE_RATE = 48000; +namespace { + constexpr auto SAMPLE_RATE = 48000; constexpr auto STEAM_AUDIO_DRIVER_PATH = L"%CommonProgramFiles(x86)%\\Steam\\drivers\\Windows10\\" STEAM_DRIVER_SUBDIR L"\\SteamStreamingSpeakers.inf"; + constexpr auto waveformat_mask_stereo = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT; + + constexpr auto waveformat_mask_surround51_with_backspeakers = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | + SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY | + SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT; + + constexpr auto waveformat_mask_surround51_with_sidespeakers = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | + SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY | + SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT; + + constexpr auto waveformat_mask_surround71 = SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | + SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY | + SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT | + SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT; + + enum class sample_format_e { + f32, + s32, + s24in32, + s24, + s16, + _size, + }; + + constexpr WAVEFORMATEXTENSIBLE + create_waveformat(sample_format_e sample_format, WORD channel_count, DWORD channel_mask) { + WAVEFORMATEXTENSIBLE waveformat = {}; + + switch (sample_format) { + default: + case sample_format_e::f32: + waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_IEEE_FLOAT; + waveformat.Format.wBitsPerSample = 32; + waveformat.Samples.wValidBitsPerSample = 32; + break; + + case sample_format_e::s32: + waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + waveformat.Format.wBitsPerSample = 32; + waveformat.Samples.wValidBitsPerSample = 32; + break; + + case sample_format_e::s24in32: + waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + waveformat.Format.wBitsPerSample = 32; + waveformat.Samples.wValidBitsPerSample = 24; + break; + + case sample_format_e::s24: + waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + waveformat.Format.wBitsPerSample = 24; + waveformat.Samples.wValidBitsPerSample = 24; + break; + + case sample_format_e::s16: + waveformat.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; + waveformat.Format.wBitsPerSample = 16; + waveformat.Samples.wValidBitsPerSample = 16; + break; + } + + static_assert((int) sample_format_e::_size == 5, "Unrecognized sample_format_e"); + + waveformat.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; + waveformat.Format.nChannels = channel_count; + waveformat.Format.nSamplesPerSec = SAMPLE_RATE; + + waveformat.Format.nBlockAlign = waveformat.Format.nChannels * waveformat.Format.wBitsPerSample / 8; + waveformat.Format.nAvgBytesPerSec = waveformat.Format.nSamplesPerSec * waveformat.Format.nBlockAlign; + waveformat.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); + + waveformat.dwChannelMask = channel_mask; + + return waveformat; + } + + using virtual_sink_waveformats_t = std::vector; + + template + virtual_sink_waveformats_t + create_virtual_sink_waveformats() { + if constexpr (channel_count == 2) { + auto channel_mask = waveformat_mask_stereo; + return { + create_waveformat(sample_format_e::f32, channel_count, channel_mask), + create_waveformat(sample_format_e::s32, channel_count, channel_mask), + create_waveformat(sample_format_e::s24in32, channel_count, channel_mask), + create_waveformat(sample_format_e::s24, channel_count, channel_mask), + create_waveformat(sample_format_e::s16, channel_count, channel_mask), + }; + } + else if (channel_count == 6) { + auto channel_mask1 = waveformat_mask_surround51_with_backspeakers; + auto channel_mask2 = waveformat_mask_surround51_with_sidespeakers; + return { + create_waveformat(sample_format_e::f32, channel_count, channel_mask1), + create_waveformat(sample_format_e::f32, channel_count, channel_mask2), + create_waveformat(sample_format_e::s32, channel_count, channel_mask1), + create_waveformat(sample_format_e::s32, channel_count, channel_mask2), + create_waveformat(sample_format_e::s24in32, channel_count, channel_mask1), + create_waveformat(sample_format_e::s24in32, channel_count, channel_mask2), + create_waveformat(sample_format_e::s24, channel_count, channel_mask1), + create_waveformat(sample_format_e::s24, channel_count, channel_mask2), + create_waveformat(sample_format_e::s16, channel_count, channel_mask1), + create_waveformat(sample_format_e::s16, channel_count, channel_mask2), + }; + } + else if (channel_count == 8) { + auto channel_mask = waveformat_mask_surround71; + return { + create_waveformat(sample_format_e::f32, channel_count, channel_mask), + create_waveformat(sample_format_e::s32, channel_count, channel_mask), + create_waveformat(sample_format_e::s24in32, channel_count, channel_mask), + create_waveformat(sample_format_e::s24, channel_count, channel_mask), + create_waveformat(sample_format_e::s16, channel_count, channel_mask), + }; + } + } + + std::string + waveformat_to_pretty_string(const WAVEFORMATEXTENSIBLE &waveformat) { + std::string result = waveformat.SubFormat == KSDATAFORMAT_SUBTYPE_IEEE_FLOAT ? "F" : + waveformat.SubFormat == KSDATAFORMAT_SUBTYPE_PCM ? "S" : + "UNKNOWN"; + + result += std::to_string(waveformat.Samples.wValidBitsPerSample) + " " + + std::to_string(waveformat.Format.nSamplesPerSec) + " "; + + switch (waveformat.dwChannelMask) { + case (waveformat_mask_stereo): + result += "2.0"; + break; + + case (waveformat_mask_surround51_with_backspeakers): + result += "5.1"; + break; + + case (waveformat_mask_surround51_with_sidespeakers): + result += "5.1 (sidespeakers)"; + break; + + case (waveformat_mask_surround71): + result += "7.1"; + break; + + default: + result += std::to_string(waveformat.Format.nChannels) + " channels (unrecognized)"; + break; + } + + return result; + } + +} // namespace + +using namespace std::literals; +namespace platf::audio { template void Release(T *p) { @@ -90,109 +246,33 @@ namespace platf::audio { }; struct format_t { - enum type_e : int { - none, ///< No format - stereo, ///< Stereo - surr51, ///< Surround 5.1 - surr71, ///< Surround 7.1 - } type; - - std::string_view name; - int channels; - int channel_mask; - } formats[] { - { - format_t::stereo, - "Stereo"sv, + WORD channel_count; + std::string name; + int capture_waveformat_channel_mask; + virtual_sink_waveformats_t virtual_sink_waveformats; + }; + + const std::array formats = { + format_t { 2, - SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT, + "Stereo", + waveformat_mask_stereo, + create_virtual_sink_waveformats<2>(), }, - { - format_t::surr51, - "Surround 5.1"sv, + format_t { 6, - SPEAKER_FRONT_LEFT | - SPEAKER_FRONT_RIGHT | - SPEAKER_FRONT_CENTER | - SPEAKER_LOW_FREQUENCY | - SPEAKER_BACK_LEFT | - SPEAKER_BACK_RIGHT, + "Surround 5.1", + waveformat_mask_surround51_with_backspeakers, + create_virtual_sink_waveformats<6>(), }, - { - format_t::surr71, - "Surround 7.1"sv, + format_t { 8, - SPEAKER_FRONT_LEFT | - SPEAKER_FRONT_RIGHT | - SPEAKER_FRONT_CENTER | - SPEAKER_LOW_FREQUENCY | - SPEAKER_BACK_LEFT | - SPEAKER_BACK_RIGHT | - SPEAKER_SIDE_LEFT | - SPEAKER_SIDE_RIGHT, + "Surround 7.1", + waveformat_mask_surround71, + create_virtual_sink_waveformats<8>(), }, }; - static format_t surround_51_side_speakers { - format_t::surr51, - "Surround 5.1"sv, - 6, - SPEAKER_FRONT_LEFT | - SPEAKER_FRONT_RIGHT | - SPEAKER_FRONT_CENTER | - SPEAKER_LOW_FREQUENCY | - SPEAKER_SIDE_LEFT | - SPEAKER_SIDE_RIGHT, - }; - - WAVEFORMATEXTENSIBLE - create_wave_format(const format_t &format) { - WAVEFORMATEXTENSIBLE wave_format; - - wave_format.Format.wFormatTag = WAVE_FORMAT_EXTENSIBLE; - wave_format.Format.nChannels = format.channels; - wave_format.Format.nSamplesPerSec = SAMPLE_RATE; - wave_format.Format.wBitsPerSample = 16; - wave_format.Format.nBlockAlign = wave_format.Format.nChannels * wave_format.Format.wBitsPerSample / 8; - wave_format.Format.nAvgBytesPerSec = wave_format.Format.nSamplesPerSec * wave_format.Format.nBlockAlign; - wave_format.Format.cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); - - wave_format.Samples.wValidBitsPerSample = 16; - wave_format.dwChannelMask = format.channel_mask; - wave_format.SubFormat = KSDATAFORMAT_SUBTYPE_PCM; - - return wave_format; - } - - int - set_wave_format(audio::wave_format_t &wave_format, const format_t &format) { - wave_format->nSamplesPerSec = SAMPLE_RATE; - wave_format->wBitsPerSample = 16; - - switch (wave_format->wFormatTag) { - case WAVE_FORMAT_PCM: - break; - case WAVE_FORMAT_IEEE_FLOAT: - break; - case WAVE_FORMAT_EXTENSIBLE: { - auto wave_ex = (PWAVEFORMATEXTENSIBLE) wave_format.get(); - wave_ex->Samples.wValidBitsPerSample = 16; - wave_ex->dwChannelMask = format.channel_mask; - wave_ex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM; - break; - } - default: - BOOST_LOG(error) << "Unsupported Wave Format: [0x"sv << util::hex(wave_format->wFormatTag).to_string_view() << ']'; - return -1; - }; - - wave_format->nChannels = format.channels; - wave_format->nBlockAlign = wave_format->nChannels * wave_format->wBitsPerSample / 8; - wave_format->nAvgBytesPerSec = wave_format->nSamplesPerSec * wave_format->nBlockAlign; - - return 0; - } - audio_client_t make_audio_client(device_t &device, const format_t &format) { audio_client_t audio_client; @@ -208,45 +288,42 @@ namespace platf::audio { return nullptr; } - WAVEFORMATEXTENSIBLE wave_format = create_wave_format(format); + WAVEFORMATEXTENSIBLE capture_waveformat = + create_waveformat(sample_format_e::f32, format.channel_count, format.capture_waveformat_channel_mask); + + { + wave_format_t mixer_waveformat; + status = audio_client->GetMixFormat(&mixer_waveformat); + if (FAILED(status)) { + BOOST_LOG(error) << "Couldn't get mix format for audio device: [0x"sv << util::hex(status).to_string_view() << ']'; + return nullptr; + } + + // Prefer the native channel layout of captured audio device when channel counts match + if (mixer_waveformat->nChannels == format.channel_count && + mixer_waveformat->wFormatTag == WAVE_FORMAT_EXTENSIBLE && + mixer_waveformat->cbSize >= 22) { + auto waveformatext_pointer = reinterpret_cast(mixer_waveformat.get()); + capture_waveformat.dwChannelMask = waveformatext_pointer->dwChannelMask; + } + } status = audio_client->Initialize( AUDCLNT_SHAREMODE_SHARED, AUDCLNT_STREAMFLAGS_LOOPBACK | AUDCLNT_STREAMFLAGS_EVENTCALLBACK | AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM | AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY, // Enable automatic resampling to 48 KHz 0, 0, - (LPWAVEFORMATEX) &wave_format, + (LPWAVEFORMATEX) &capture_waveformat, nullptr); if (status) { - BOOST_LOG(debug) << "Couldn't initialize audio client for ["sv << format.name << "]: [0x"sv << util::hex(status).to_string_view() << ']'; + BOOST_LOG(error) << "Couldn't initialize audio client for ["sv << format.name << "]: [0x"sv << util::hex(status).to_string_view() << ']'; return nullptr; } - return audio_client; - } - - const wchar_t * - no_null(const wchar_t *str) { - return str ? str : L"Unknown"; - } - - bool - validate_device(device_t &device) { - bool valid = false; - - // Check for any valid format - for (const auto &format : formats) { - auto audio_client = make_audio_client(device, format); - - BOOST_LOG(debug) << format.name << ": "sv << (!audio_client ? "unsupported"sv : "supported"sv); - - if (audio_client) { - valid = true; - } - } + BOOST_LOG(info) << "Audio capture format is " << logging::bracket(waveformat_to_pretty_string(capture_waveformat)); - return valid; + return audio_client; } device_t @@ -341,7 +418,7 @@ namespace platf::audio { class mic_wasapi_t: public mic_t { public: capture_e - sample(std::vector &sample_out) override { + sample(std::vector &sample_out) override { auto sample_size = sample_out.size(); // Refill the sample buffer if needed @@ -398,9 +475,10 @@ namespace platf::audio { return -1; } - for (auto &format : formats) { - if (format.channels != channels_out) { - BOOST_LOG(debug) << "Skipping audio format ["sv << format.name << "] with channel count ["sv << format.channels << " != "sv << channels_out << ']'; + for (const auto &format : formats) { + if (format.channel_count != channels_out) { + BOOST_LOG(debug) << "Skipping audio format ["sv << format.name << "] with channel count ["sv + << format.channel_count << " != "sv << channels_out << ']'; continue; } @@ -432,7 +510,7 @@ namespace platf::audio { } // *2 --> needs to fit double - sample_buf = util::buffer_t { std::max(frames, frame_size) * 2 * channels_out }; + sample_buf = util::buffer_t { std::max(frames, frame_size) * 2 * channels_out }; sample_buf_pos = std::begin(sample_buf); status = audio_client->GetService(IID_IAudioCaptureClient, (void **) &audio_capture); @@ -489,7 +567,7 @@ namespace platf::audio { // Total number of samples struct sample_aligned_t { std::uint32_t uninitialized; - std::int16_t *samples; + float *samples; } sample_aligned; // number of samples / number of channels @@ -588,8 +666,8 @@ namespace platf::audio { REFERENCE_TIME default_latency_ms; - util::buffer_t sample_buf; - std::int16_t *sample_buf_pos; + util::buffer_t sample_buf; + float *sample_buf_pos; int channels; HANDLE mmcss_task_handle = NULL; @@ -599,117 +677,77 @@ namespace platf::audio { public: std::optional sink_info() override { - auto virtual_adapter_name = L"Steam Streaming Speakers"sv; - sink_t sink; - auto device = default_device(device_enum); - if (!device) { - return std::nullopt; - } - - audio::wstring_t wstring; - device->GetId(&wstring); - - sink.host = to_utf8(wstring.get()); - - collection_t collection; - auto status = device_enum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &collection); - if (FAILED(status)) { - BOOST_LOG(error) << "Couldn't enumerate: [0x"sv << util::hex(status).to_string_view() << ']'; - - return std::nullopt; - } - - UINT count; - collection->GetCount(&count); - - // If the sink isn't a device name, we'll assume it's a device ID - auto virtual_device_id = find_device_id_by_name(config::audio.virtual_sink).value_or(from_utf8(config::audio.virtual_sink)); - auto virtual_device_found = false; - - for (auto x = 0; x < count; ++x) { - audio::device_t device; - collection->Item(x, &device); - - if (!validate_device(device)) { - continue; + // Fill host sink name with the device_id of the current default audio device. + { + auto device = default_device(device_enum); + if (!device) { + return std::nullopt; } - audio::wstring_t wstring; - device->GetId(&wstring); - std::wstring device_id { wstring.get() }; - - audio::prop_t prop; - device->OpenPropertyStore(STGM_READ, &prop); - - prop_var_t adapter_friendly_name; - prop_var_t device_friendly_name; - prop_var_t device_desc; + audio::wstring_t id; + device->GetId(&id); - prop->GetValue(PKEY_Device_FriendlyName, &device_friendly_name.prop); - prop->GetValue(PKEY_DeviceInterface_FriendlyName, &adapter_friendly_name.prop); - prop->GetValue(PKEY_Device_DeviceDesc, &device_desc.prop); + sink.host = to_utf8(id.get()); + } - auto adapter_name = no_null((LPWSTR) adapter_friendly_name.prop.pszVal); - BOOST_LOG(verbose) - << L"===== Device ====="sv << std::endl - << L"Device ID : "sv << wstring.get() << std::endl - << L"Device name : "sv << no_null((LPWSTR) device_friendly_name.prop.pszVal) << std::endl - << L"Adapter name : "sv << adapter_name << std::endl - << L"Device description : "sv << no_null((LPWSTR) device_desc.prop.pszVal) << std::endl - << std::endl; - - if (virtual_device_id.empty() && adapter_name == virtual_adapter_name) { - virtual_device_id = std::move(device_id); - virtual_device_found = true; - break; - } - else if (virtual_device_id == device_id) { - virtual_device_found = true; - break; - } + // Prepare to search for the device_id of the virtual audio sink device, + // this device can be either user-configured or + // the Steam Streaming Speakers we use by default. + match_fields_list_t match_list; + if (config::audio.virtual_sink.empty()) { + match_list = match_steam_speakers(); + } + else { + match_list = match_all_fields(from_utf8(config::audio.virtual_sink)); } - if (virtual_device_found) { - auto name_suffix = to_utf8(virtual_device_id); + // Search for the virtual audio sink device currently present in the system. + auto matched = find_device_id(match_list); + if (matched) { + // Prepare to fill virtual audio sink names with device_id. + auto device_id = to_utf8(matched->second); + // Also prepend format name (basically channel layout at the moment) + // because we don't want to extend the platform interface. sink.null = std::make_optional(sink_t::null_t { - "virtual-"s.append(formats[format_t::stereo - 1].name) + name_suffix, - "virtual-"s.append(formats[format_t::surr51 - 1].name) + name_suffix, - "virtual-"s.append(formats[format_t::surr71 - 1].name) + name_suffix, + "virtual-"s + formats[0].name + device_id, + "virtual-"s + formats[1].name + device_id, + "virtual-"s + formats[2].name + device_id, }); } - else if (!virtual_device_id.empty()) { - BOOST_LOG(warning) << "Unable to find the specified virtual sink: "sv << virtual_device_id; + else if (!config::audio.virtual_sink.empty()) { + BOOST_LOG(warning) << "Couldn't find the specified virtual audio sink " << config::audio.virtual_sink; } return sink; } /** - * @brief Gets information encoded in the raw sink name - * @param sink The raw sink name - * @return A pair of type and the real sink name + * @brief Extract virtual audio sink information possibly encoded in the sink name. + * @param sink The sink name + * @return A pair of device_id and format reference if the sink name matches + * our naming scheme for virtual audio sinks, `std::nullopt` otherwise. */ - std::pair - get_sink_info(const std::string &sink) { - std::string_view sv { sink.c_str(), sink.size() }; - - // sink format: + std::optional>> + extract_virtual_sink_info(const std::string &sink) { + // Encoding format: // [virtual-(format name)]device_id + std::string current = sink; auto prefix = "virtual-"sv; - if (sv.find(prefix) == 0) { - sv = sv.substr(prefix.size(), sv.size() - prefix.size()); + if (current.find(prefix) == 0) { + current = current.substr(prefix.size(), current.size() - prefix.size()); - for (auto &format : formats) { + for (const auto &format : formats) { auto &name = format.name; - if (sv.find(name) == 0) { - return std::make_pair(format.type, sv.substr(name.size(), sv.size() - name.size())); + if (current.find(name) == 0) { + auto device_id = from_utf8(current.substr(name.size(), current.size() - name.size())); + return std::make_pair(device_id, std::reference_wrapper(format)); } } } - return std::make_pair(format_t::none, sv); + return std::nullopt; } std::unique_ptr @@ -721,8 +759,8 @@ namespace platf::audio { } // If this is a virtual sink, set a callback that will change the sink back if it's changed - auto sink_info = get_sink_info(assigned_sink); - if (sink_info.first != format_t::none) { + auto virtual_sink_info = extract_virtual_sink_info(assigned_sink); + if (virtual_sink_info) { mic->default_endpt_changed_cb = [this] { BOOST_LOG(info) << "Resetting sink to ["sv << assigned_sink << "] after default changed"; set_sink(assigned_sink); @@ -742,55 +780,56 @@ namespace platf::audio { */ std::optional set_format(const std::string &sink) { - auto sink_info = get_sink_info(sink); - - // If the sink isn't a device name, we'll assume it's a device ID - auto wstring_device_id = find_device_id_by_name(sink).value_or(from_utf8(sink_info.second.data())); - - if (sink_info.first == format_t::none) { - // wstring_device_id does not contain virtual-(format name) - // It's a simple deviceId, just pass it back - return std::make_optional(std::move(wstring_device_id)); - } - - wave_format_t wave_format; - auto status = policy->GetMixFormat(wstring_device_id.c_str(), &wave_format); - if (FAILED(status)) { - BOOST_LOG(error) << "Couldn't acquire Wave Format [0x"sv << util::hex(status).to_string_view() << ']'; - + if (sink.empty()) { return std::nullopt; } - set_wave_format(wave_format, formats[(int) sink_info.first - 1]); - - WAVEFORMATEXTENSIBLE p {}; - status = policy->SetDeviceFormat(wstring_device_id.c_str(), wave_format.get(), (WAVEFORMATEX *) &p); + auto virtual_sink_info = extract_virtual_sink_info(sink); - // Surround 5.1 might contain side-{left, right} instead of speaker in the back - // Try again with different speaker mask. - if (status == 0x88890008 && sink_info.first == format_t::surr51) { - set_wave_format(wave_format, surround_51_side_speakers); - status = policy->SetDeviceFormat(wstring_device_id.c_str(), wave_format.get(), (WAVEFORMATEX *) &p); + if (!virtual_sink_info) { + // Sink name does not begin with virtual-(format name), hence it's not a virtual sink + // and we don't want to change playback format of the corresponding device. + // Also need to perform matching, sink name is not necessarily device_id in this case. + auto matched = find_device_id(match_all_fields(from_utf8(sink))); + if (matched) { + return matched->second; + } + else { + BOOST_LOG(error) << "Couldn't find audio sink " << sink; + return std::nullopt; + } } - if (FAILED(status)) { - BOOST_LOG(error) << "Couldn't set Wave Format [0x"sv << util::hex(status).to_string_view() << ']'; - return std::nullopt; + auto &device_id = virtual_sink_info->first; + auto &waveformats = virtual_sink_info->second.get().virtual_sink_waveformats; + for (const auto &waveformat : waveformats) { + // We're using completely undocumented and unlisted API, + // better not pass objects without copying them first. + auto device_id_copy = device_id; + auto waveformat_copy = waveformat; + auto waveformat_copy_pointer = reinterpret_cast(&waveformat_copy); + + WAVEFORMATEXTENSIBLE p {}; + if (SUCCEEDED(policy->SetDeviceFormat(device_id_copy.c_str(), waveformat_copy_pointer, (WAVEFORMATEX *) &p))) { + BOOST_LOG(info) << "Changed virtual audio sink format to " << logging::bracket(waveformat_to_pretty_string(waveformat)); + return device_id; + } } - return std::make_optional(std::move(wstring_device_id)); + BOOST_LOG(error) << "Couldn't set virtual audio sink waveformat"; + return std::nullopt; } int set_sink(const std::string &sink) override { - auto wstring_device_id = set_format(sink); - if (!wstring_device_id) { + auto device_id = set_format(sink); + if (!device_id) { return -1; } int failure {}; for (int x = 0; x < (int) ERole_enum_count; ++x) { - auto status = policy->SetDefaultEndpoint(wstring_device_id->c_str(), (ERole) x); + auto status = policy->SetDefaultEndpoint(device_id->c_str(), (ERole) x); if (status) { // Depending on the format of the string, we could get either of these errors if (status == HRESULT_FROM_WIN32(ERROR_NOT_FOUND) || status == E_INVALIDARG) { @@ -813,14 +852,41 @@ namespace platf::audio { return failure; } + enum class match_field_e { + device_id, ///< Match device_id + device_friendly_name, ///< Match endpoint friendly name + adapter_friendly_name, ///< Match adapter friendly name + device_description, ///< Match endpoint description + }; + + using match_fields_list_t = std::vector>; + using matched_field_t = std::pair; + + audio_control_t::match_fields_list_t + match_steam_speakers() { + return { + { match_field_e::adapter_friendly_name, L"Steam Streaming Speakers" } + }; + } + + audio_control_t::match_fields_list_t + match_all_fields(const std::wstring &name) { + return { + { match_field_e::device_id, name }, // {0.0.0.00000000}.{29dd7668-45b2-4846-882d-950f55bf7eb8} + { match_field_e::device_friendly_name, name }, // Digital Audio (S/PDIF) (High Definition Audio Device) + { match_field_e::device_description, name }, // Digital Audio (S/PDIF) + { match_field_e::adapter_friendly_name, name }, // High Definition Audio Device + }; + } + /** - * @brief Find the audio device ID given a user-specified name. - * @param name The name provided by the user. - * @return The matching device ID, or nothing if not found. + * @brief Search for currently present audio device_id using multiple match fields. + * @param match_list Pairs of match fields and values + * @return Optional pair of matched field and device_id */ - std::optional - find_device_id_by_name(const std::string &name) { - if (name.empty()) { + std::optional + find_device_id(const match_fields_list_t &match_list) { + if (match_list.empty()) { return std::nullopt; } @@ -828,25 +894,20 @@ namespace platf::audio { auto status = device_enum->EnumAudioEndpoints(eRender, DEVICE_STATE_ACTIVE, &collection); if (FAILED(status)) { BOOST_LOG(error) << "Couldn't enumerate: [0x"sv << util::hex(status).to_string_view() << ']'; - return std::nullopt; } - UINT count; + UINT count = 0; collection->GetCount(&count); - auto wstring_name = from_utf8(name.data()); - + std::vector matched(match_list.size()); for (auto x = 0; x < count; ++x) { audio::device_t device; collection->Item(x, &device); - if (!validate_device(device)) { - continue; - } - audio::wstring_t wstring_id; device->GetId(&wstring_id); + std::wstring device_id = wstring_id.get(); audio::prop_t prop; device->OpenPropertyStore(STGM_READ, &prop); @@ -859,15 +920,36 @@ namespace platf::audio { prop->GetValue(PKEY_DeviceInterface_FriendlyName, &adapter_friendly_name.prop); prop->GetValue(PKEY_Device_DeviceDesc, &device_desc.prop); - auto adapter_name = no_null((LPWSTR) adapter_friendly_name.prop.pszVal); - auto device_name = no_null((LPWSTR) device_friendly_name.prop.pszVal); - auto device_description = no_null((LPWSTR) device_desc.prop.pszVal); + for (size_t i = 0; i < match_list.size(); i++) { + if (matched[i].empty()) { + const wchar_t *match_value = nullptr; + switch (match_list[i].first) { + case match_field_e::device_id: + match_value = device_id.c_str(); + break; + + case match_field_e::device_friendly_name: + match_value = device_friendly_name.prop.pwszVal; + break; + + case match_field_e::adapter_friendly_name: + match_value = adapter_friendly_name.prop.pwszVal; + break; + + case match_field_e::device_description: + match_value = device_desc.prop.pwszVal; + break; + } + if (match_value && std::wcscmp(match_value, match_list[i].second.c_str()) == 0) { + matched[i] = device_id; + } + } + } + } - // Match the user-specified name against any of the user-visible strings - if (std::wcscmp(wstring_name.c_str(), adapter_name) == 0 || - std::wcscmp(wstring_name.c_str(), device_name) == 0 || - std::wcscmp(wstring_name.c_str(), device_description) == 0) { - return std::make_optional(std::wstring { wstring_id.get() }); + for (size_t i = 0; i < match_list.size(); i++) { + if (!matched[i].empty()) { + return matched_field_t(match_list[i].first, matched[i]); } } @@ -879,10 +961,11 @@ namespace platf::audio { */ void reset_default_device() { - auto steam_device_id = find_device_id_by_name("Steam Streaming Speakers"s); - if (!steam_device_id) { + auto matched_steam = find_device_id(match_steam_speakers()); + if (!matched_steam) { return; } + auto steam_device_id = matched_steam->second; { // Get the current default audio device (if present) @@ -895,13 +978,13 @@ namespace platf::audio { current_default_dev->GetId(¤t_default_id); // If Steam Streaming Speakers are already not default, we're done. - if (*steam_device_id != current_default_id.get()) { + if (steam_device_id != current_default_id.get()) { return; } } // Disable the Steam Streaming Speakers temporarily to allow the OS to pick a new default. - auto hr = policy->SetEndpointVisibility(steam_device_id->c_str(), FALSE); + auto hr = policy->SetEndpointVisibility(steam_device_id.c_str(), FALSE); if (FAILED(hr)) { BOOST_LOG(warning) << "Failed to disable Steam audio device: "sv << util::hex(hr).to_string_view(); return; @@ -911,7 +994,7 @@ namespace platf::audio { auto new_default_dev = default_device(device_enum); // Enable the Steam Streaming Speakers again - hr = policy->SetEndpointVisibility(steam_device_id->c_str(), TRUE); + hr = policy->SetEndpointVisibility(steam_device_id.c_str(), TRUE); if (FAILED(hr)) { BOOST_LOG(warning) << "Failed to enable Steam audio device: "sv << util::hex(hr).to_string_view(); return; @@ -1063,7 +1146,7 @@ namespace platf { // Install Steam Streaming Speakers if needed. We do this during audio_control() to ensure // the sink information returned includes the new Steam Streaming Speakers device. - if (config::audio.install_steam_drivers && !control->find_device_id_by_name("Steam Streaming Speakers"s)) { + if (config::audio.install_steam_drivers && !control->find_device_id(control->match_steam_speakers())) { // This is best effort. Don't fail if it doesn't work. control->install_steam_audio_drivers(); }