Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: automatic selection for hybrid GPU and IDDSampleDriver users #3002

Merged
merged 18 commits into from
Sep 29, 2024
Merged
70 changes: 44 additions & 26 deletions src/platform/windows/display_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <initguid.h>
#include <thread>

#include <boost/algorithm/string/join.hpp>
#include <boost/process.hpp>

// We have to include boost/process.hpp before display.h due to WinSock.h,
Expand Down Expand Up @@ -351,24 +352,8 @@ namespace platf::dxgi {
return true;
}

// On hybrid graphics systems, Windows will change the order of GPUs reported by
// DXGI in accordance with the user's GPU preference. If the selected GPU is a
// render-only device with no displays, DXGI will add virtual outputs to the
// that device to avoid confusing applications. While this works properly for most
// applications, it breaks the Desktop Duplication API because DXGI doesn't proxy
// the virtual DXGIOutput to the real GPU it is attached to. When trying to call
// DuplicateOutput() on one of these virtual outputs, it fails with DXGI_ERROR_UNSUPPORTED
// (even if you try sneaky stuff like passing the ID3D11Device for the iGPU and the
// virtual DXGIOutput from the dGPU). Because the GPU preference is once-per-process,
// we spawn a helper tool to probe for us before we set our own GPU preference.
bool
probe_for_gpu_preference(const std::string &display_name) {
// If we've already been through here, there's nothing to do this time.
static bool set_gpu_preference = false;
if (set_gpu_preference) {
return true;
}

validate_and_test_gpu_preference(const std::string &display_name, bool verify_frame_capture) {
std::string cmd = "tools\\ddprobe.exe";

// We start at 1 because 0 is automatic selection which can be overridden by
Expand All @@ -378,38 +363,71 @@ namespace platf::dxgi {
for (int i = 1; i < 5; i++) {
// Run the probe tool. It returns the status of DuplicateOutput().
//
// Arg format: [GPU preference] [Display name]
// Arg format: [GPU preference] [Display name] [--verify-frame-capture]
HRESULT result;
std::vector<std::string> args = { std::to_string(i), display_name };
try {
result = bp::system(cmd, std::to_string(i), display_name, bp::std_out > bp::null, bp::std_err > bp::null);
if (verify_frame_capture) {
args.emplace_back("--verify-frame-capture");
}
result = bp::system(cmd, bp::args(args), bp::std_out > bp::null, bp::std_err > bp::null);
}
catch (bp::process_error &e) {
BOOST_LOG(error) << "Failed to start ddprobe.exe: "sv << e.what();
return false;
}

BOOST_LOG(info) << "ddprobe.exe ["sv << i << "] ["sv << display_name << "] returned: 0x"sv << util::hex(result).to_string_view();
BOOST_LOG(info) << "ddprobe.exe " << boost::algorithm::join(args, " ") << " returned 0x"
<< util::hex(result).to_string_view();

// E_ACCESSDENIED can happen at the login screen. If we get this error,
// we know capture would have been supported, because DXGI_ERROR_UNSUPPORTED
// would have been raised first if it wasn't.
if (result == S_OK || result == E_ACCESSDENIED) {
// We found a working GPU preference, so set ourselves to use that.
if (set_gpu_preference_on_self(i)) {
set_gpu_preference = true;
return true;
}
else {
return false;
}
}
else {
// This configuration didn't work, so continue testing others
continue;
}
}

// If none of the manual options worked, leave the GPU preference alone
// If no valid configuration was found, return false
return false;
}

// On hybrid graphics systems, Windows will change the order of GPUs reported by
// DXGI in accordance with the user's GPU preference. If the selected GPU is a
// render-only device with no displays, DXGI will add virtual outputs to the
// that device to avoid confusing applications. While this works properly for most
// applications, it breaks the Desktop Duplication API because DXGI doesn't proxy
// the virtual DXGIOutput to the real GPU it is attached to. When trying to call
// DuplicateOutput() on one of these virtual outputs, it fails with DXGI_ERROR_UNSUPPORTED
// (even if you try sneaky stuff like passing the ID3D11Device for the iGPU and the
// virtual DXGIOutput from the dGPU). Because the GPU preference is once-per-process,
// we spawn a helper tool to probe for us before we set our own GPU preference.
bool
probe_for_gpu_preference(const std::string &display_name) {
static bool set_gpu_preference = false;

// If we've already been through here, there's nothing to do this time.
if (set_gpu_preference) {
return true;
}

// Try probing with different GPU preferences and verify_frame_capture flag
if (validate_and_test_gpu_preference(display_name, true)) {
return true;
}

// If no valid configuration was found, try again with verify_frame_capture == false
if (validate_and_test_gpu_preference(display_name, false)) {
return true;
}

// If neither worked, return false
return false;
}

Expand Down
175 changes: 164 additions & 11 deletions tools/ddprobe.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@
#include <iostream>
#include <locale>
#include <string>
#include <wrl.h>

#include "src/utility.h"

using Microsoft::WRL::ComPtr;
using namespace std::literals;
namespace dxgi {
template <class T>
Expand Down Expand Up @@ -69,8 +71,128 @@ syncThreadDesktop() {
CloseDesktop(hDesk);
}

/**
* @brief Determines if a given frame is valid by checking if it contains any non-dark pixels.
*
* This function analyzes the provided frame to determine if it contains any pixels that exceed a specified darkness threshold.
* It iterates over all pixels in the frame, comparing each pixel's RGB values to the defined darkness threshold.
* If any pixel's RGB values exceed this threshold, the function concludes that the frame is valid (i.e., not entirely dark) and returns `true`.
* If all pixels are below or equal to the threshold, indicating a completely dark frame, the function returns `false`.

* @param mappedResource A reference to a `D3D11_MAPPED_SUBRESOURCE` structure containing the mapped subresource data of the frame to be analyzed.
* @param frameDesc A reference to a `D3D11_TEXTURE2D_DESC` structure describing the texture properties, including width and height.
* @param darknessThreshold A floating-point value representing the threshold above which a pixel's RGB values are considered dark. The value ranges from 0.0f to 1.0f, with a default value of 0.1f.
* @return Returns `true` if the frame contains any non-dark pixels, indicating it is valid; otherwise, returns `false`.
*/
bool
is_valid_frame(const D3D11_MAPPED_SUBRESOURCE &mappedResource, const D3D11_TEXTURE2D_DESC &frameDesc, float darknessThreshold = 0.1f) {
cgutman marked this conversation as resolved.
Show resolved Hide resolved
const auto *pixels = static_cast<const uint8_t *>(mappedResource.pData);
const int bytesPerPixel = 4; // (8 bits per channel, excluding alpha). Factoring HDR is not needed because it doesn't cause black levels to raise enough to be a concern.
const int stride = mappedResource.RowPitch;
const int width = frameDesc.Width;
const int height = frameDesc.Height;

// Convert the darkness threshold to an integer value for comparison
const auto threshold = static_cast<int>(darknessThreshold * 255);

// Iterate over each pixel in the frame
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; ++x) {
const uint8_t *pixel = pixels + y * stride + x * bytesPerPixel;
// Check if any RGB channel exceeds the darkness threshold
if (pixel[0] > threshold || pixel[1] > threshold || pixel[2] > threshold) {
// Frame is not dark
return true;
}
}
}
// Frame is entirely dark
return false;
}

/**
* @brief Captures and verifies the contents of up to 10 consecutive frames from a DXGI output duplication.
*
* This function attempts to acquire and analyze up to 10 frames from a DXGI output duplication object (`dup`).
* It checks if each frame is non-empty (not entirely dark) by using the `is_valid_frame` function.
* If any non-empty frame is found, the function returns `S_OK`.
* If all 10 frames are empty, it returns `E_FAIL`, suggesting potential issues with the capture process.
* If any error occurs during the frame acquisition or analysis process, the corresponding `HRESULT` error code is returned.
*
* @param dup A reference to the DXGI output duplication object (`dxgi::dup_t&`) used to acquire frames.
* @param device A ComPtr to the ID3D11Device interface representing the device associated with the Direct3D context.
* @return Returns `S_OK` if a non-empty frame is captured successfully, `E_FAIL` if all frames are empty, or an error code if any failure occurs during the process.
*/
HRESULT
test_frame_capture(dxgi::dup_t &dup, ComPtr<ID3D11Device> device) {
for (int i = 0; i < 10; ++i) {
std::cout << "Attempting to acquire frame " << (i + 1) << " of 10..." << std::endl;
ComPtr<IDXGIResource> frameResource;
DXGI_OUTDUPL_FRAME_INFO frameInfo;
ComPtr<ID3D11DeviceContext> context;
ComPtr<ID3D11Texture2D> stagingTexture;

HRESULT status = dup->AcquireNextFrame(500, &frameInfo, &frameResource);
device->GetImmediateContext(&context);

if (FAILED(status)) {
std::cout << "Error: Failed to acquire next frame [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

auto cleanup = util::fail_guard([&dup]() {
dup->ReleaseFrame();
});

std::cout << "Frame acquired successfully." << std::endl;

ComPtr<ID3D11Texture2D> frameTexture;
status = frameResource->QueryInterface(IID_PPV_ARGS(&frameTexture));
if (FAILED(status)) {
std::cout << "Error: Failed to query texture interface from frame resource [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

D3D11_TEXTURE2D_DESC frameDesc;
frameTexture->GetDesc(&frameDesc);
frameDesc.Usage = D3D11_USAGE_STAGING;
frameDesc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
frameDesc.BindFlags = 0;
frameDesc.MiscFlags = 0;

status = device->CreateTexture2D(&frameDesc, nullptr, &stagingTexture);
if (FAILED(status)) {
std::cout << "Error: Failed to create staging texture [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

context->CopyResource(stagingTexture.Get(), frameTexture.Get());

D3D11_MAPPED_SUBRESOURCE mappedResource;
status = context->Map(stagingTexture.Get(), 0, D3D11_MAP_READ, 0, &mappedResource);
if (FAILED(status)) {
std::cout << "Error: Failed to map the staging texture for inspection [0x"sv << util::hex(status).to_string_view() << ']' << std::endl;
return status;
}

auto contextCleanup = util::fail_guard([&context, &stagingTexture]() {
context->Unmap(stagingTexture.Get(), 0);
});

if (is_valid_frame(mappedResource, frameDesc)) {
std::cout << "Frame " << (i + 1) << " is non-empty (contains visible content)." << std::endl;
return S_OK;
}

std::cout << "Frame " << (i + 1) << " is empty (no visible content)." << std::endl;
}

// All frames were empty, indicating potential capture issues.
return E_FAIL;
}

HRESULT
test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output) {
test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output, bool verify_frame_capture) {
D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
Expand Down Expand Up @@ -107,29 +229,60 @@ test_dxgi_duplication(dxgi::adapter_t &adapter, dxgi::output_t &output) {
// Ensure we can duplicate the current display
syncThreadDesktop();

// Return the result of DuplicateOutput() to Sunshine
// Attempt to duplicate the output
dxgi::dup_t dup;
return output1->DuplicateOutput((IUnknown *) device.get(), &dup);
ComPtr<ID3D11Device> device_ptr(device.get());
HRESULT result = output1->DuplicateOutput(device_ptr.Get(), &dup);

if (FAILED(result)) {
std::cout << "Failed to duplicate output [0x"sv << util::hex(result).to_string_view() << "]" << std::endl;
return result;
}

// To prevent false negatives, we'll make it optional to test for frame capture.
if (verify_frame_capture) {
HRESULT captureResult = test_frame_capture(dup, device_ptr.Get());
if (FAILED(captureResult)) {
std::cout << "Frame capture test failed [0x"sv << util::hex(captureResult).to_string_view() << "]" << std::endl;
return captureResult;
}
}

return S_OK;
}

int
main(int argc, char *argv[]) {
HRESULT status;

// Display name may be omitted
if (argc != 2 && argc != 3) {
std::cout << "ddprobe.exe [GPU preference value] [display name]"sv << std::endl;
// Usage message
if (argc < 2 || argc > 4) {
std::cout << "Usage: ddprobe.exe [GPU preference value] [display name] [--verify-frame-capture]"sv << std::endl;
return -1;
}

std::wstring display_name;
if (argc == 3) {
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
display_name = converter.from_bytes(argv[2]);
bool verify_frame_capture = false;

// Parse GPU preference value (required)
int gpu_preference = atoi(argv[1]);

// Parse optional arguments
for (int i = 2; i < argc; ++i) {
std::string arg = argv[i];

if (arg == "--verify-frame-capture") {
verify_frame_capture = true;
}
else {
// Assume any other argument is the display name
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>, wchar_t> converter;
display_name = converter.from_bytes(arg);
}
}

// We must set the GPU preference before making any DXGI/D3D calls
status = set_gpu_preference(atoi(argv[1]));
status = set_gpu_preference(gpu_preference);
if (status != ERROR_SUCCESS) {
return status;
}
Expand Down Expand Up @@ -173,7 +326,7 @@ main(int argc, char *argv[]) {
}

// We found the matching output. Test it and return the result.
return test_dxgi_duplication(adapter, output);
return test_dxgi_duplication(adapter, output, verify_frame_capture);
}
}

Expand Down
Loading