Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nvidia Reflex support #4632

Merged
merged 22 commits into from
Jan 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b446d0a
[util] Add parameter to initialize small_vector with a given size
doitsujin Jan 20, 2025
1998c6a
[util] Expose frame rate environment variable
doitsujin Jan 17, 2025
9a0057f
[dxvk] Add latency tracker
doitsujin Jan 17, 2025
26ff3d9
[dxvk] Add latency tracker to queue submissions
doitsujin Jan 17, 2025
fa6c326
[dxvk] Add latency tracker to presenter
doitsujin Jan 17, 2025
1dee62e
[dxvk] Add latency tracker to context
doitsujin Jan 17, 2025
7c88807
[hud] Add frame latency item
doitsujin Jan 17, 2025
ac88685
[d3d11] Implement latency tracking
doitsujin Jan 17, 2025
970378a
[dxgi] Pass display refresh in windowed mode if latency sleep is enabled
doitsujin Jan 19, 2025
a537ecf
[d3d9] Implement latency tracking
doitsujin Jan 17, 2025
1eb7da5
[d3d9] Always limit to display refresh in low latency mode
doitsujin Jan 20, 2025
7f38314
[dxvk] Enable VK_NV_low_latency2 if supported
doitsujin Jan 16, 2025
d09cef5
[dxvk] Add option to disable VK_NV_low_latency2
doitsujin Jan 19, 2025
f4dc269
[dxvk] Implement NV_low_latency2 functionality in presenter
doitsujin Jan 19, 2025
00fc4af
[dxvk] Pass present IDs to command submissions as necessary
doitsujin Jan 19, 2025
d1c33c3
[dxvk] Add NV_low_latency2 path to latency tracker
doitsujin Jan 21, 2025
95e2635
[dxvk] Implement Reflex latency tracker
doitsujin Jan 20, 2025
b686d95
[dxvk] Refactor CS chunk queues
doitsujin Jan 20, 2025
75617db
[d3d11] Add queue parameter to CS chunk injection
doitsujin Jan 20, 2025
d8f4ce1
[d3d11] Add ID3DLowLatencyDevice definitions
doitsujin Jan 16, 2025
3eb96fb
[d3d11] Stub out ID3DLowLatencyDevice for D3D11
doitsujin Jan 16, 2025
efcab86
[d3d11] Implement ID3DLowLatencyDevice
doitsujin Jan 20, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions dxvk.conf
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,46 @@
# d3d9.maxFrameRate = 0


# Controls latency sleep and Nvidia Reflex support.
#
# Supported values:
# - Auto: By default, DXVK only supports latency sleep in D3D11 games that
# use Reflex if the graphics driver supports VK_NV_low_latency2,
# and if dxvk-nvapi is enabled in Proton.
# - True: Enables built-in latency reduction based on internal timings.
# This assumes that input sampling for any given frame happens after
# the D3D9 or DXGI Present call returns; games that render and present
# asynchronously will not behave as intended.
# Similarly, this will not have any effect in games with built-in frame
# rate limiters, or if an external limiter (such as MangoHud) is used.
# In some games, enabling this may reduce performance or lead to less
# consistent frame pacing.
# The implementation will either use VK_NV_low_latency2 if supported
# by the driver, or a custom algorithm.
# - False: Disable Reflex support as well as built-in latency reduction.

# dxvk.latencySleep = Auto


# Tolerance for the latency sleep heuristic, in microseconds. Higher values
# increase latency, but may lead to better frame pacing in some cases. Does
# not have any effect if NV_low_latency2 is used.
#
# Supported values: Any non-negative number

# dxvk.latencyTolerance = 1000


# Disables the use of VK_NV_low_latency2. This will make Reflex unavailable
# in games, and if dxvk.latencySleep is set to True, a custom algorithm will
# be used for latency control. By default, the extension will not be used in
# 32-bit applications due to driver issues.
#
# Supported values: Auto, True, False

# dxvk.disableNvLowLatency2 = Auto


# Override PCI vendor and device IDs reported to the application. Can
# cause the app to adjust behaviour depending on the selected values.
#
Expand Down
2 changes: 1 addition & 1 deletion src/d3d11/d3d11_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ namespace dxvk {

void D3D11Buffer::SetDebugName(const char* pName) {
if (m_buffer) {
m_parent->GetContext()->InjectCs([
m_parent->GetContext()->InjectCs(DxvkCsQueue::HighPriority, [
cBuffer = m_buffer,
cName = std::string(pName ? pName : "")
] (DxvkContext* ctx) {
Expand Down
13 changes: 10 additions & 3 deletions src/d3d11/d3d11_context_imm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -861,11 +861,17 @@ namespace dxvk {
}


void D3D11ImmediateContext::EndFrame() {
void D3D11ImmediateContext::EndFrame(
Rc<DxvkLatencyTracker> LatencyTracker) {
D3D10DeviceLock lock = LockContext();

EmitCs<false>([] (DxvkContext* ctx) {
EmitCs<false>([
cTracker = std::move(LatencyTracker)
] (DxvkContext* ctx) {
ctx->endFrame();

if (cTracker && cTracker->needsAutoMarkers())
ctx->endLatencyTracking(cTracker);
});
}

Expand Down Expand Up @@ -914,11 +920,12 @@ namespace dxvk {


void D3D11ImmediateContext::InjectCsChunk(
DxvkCsQueue Queue,
DxvkCsChunkRef&& Chunk,
bool Synchronize) {
// Do not update the sequence number when emitting a chunk
// from an external source since that would break tracking
m_csThread.injectChunk(std::move(Chunk), Synchronize);
m_csThread.injectChunk(Queue, std::move(Chunk), Synchronize);
}


Expand Down
7 changes: 5 additions & 2 deletions src/d3d11/d3d11_context_imm.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,18 @@ namespace dxvk {
}

void InjectCsChunk(
DxvkCsQueue Queue,
DxvkCsChunkRef&& Chunk,
bool Synchronize);

template<typename Fn>
void InjectCs(
DxvkCsQueue Queue,
Fn&& Command) {
auto chunk = AllocCsChunk();
chunk->push(std::move(Command));

InjectCsChunk(std::move(chunk), false);
InjectCsChunk(Queue, std::move(chunk), false);
}

private:
Expand Down Expand Up @@ -169,7 +171,8 @@ namespace dxvk {

void SynchronizeDevice();

void EndFrame();
void EndFrame(
Rc<DxvkLatencyTracker> LatencyTracker);

bool WaitForResource(
const DxvkPagedResource& Resource,
Expand Down
202 changes: 200 additions & 2 deletions src/d3d11/d3d11_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2828,7 +2828,7 @@ namespace dxvk {
feedback = ctx->ensureImageCompatibility(cImage, usageInfo);
});

m_device->GetContext()->InjectCsChunk(std::move(chunk), true);
m_device->GetContext()->InjectCsChunk(DxvkCsQueue::HighPriority, std::move(chunk), true);

if (!feedback) {
Logger::err(str::format("Failed to lock image:"
Expand All @@ -2852,7 +2852,7 @@ namespace dxvk {
ctx->ensureBufferAddress(cBuffer);
});

m_device->GetContext()->InjectCsChunk(std::move(chunk), true);
m_device->GetContext()->InjectCsChunk(DxvkCsQueue::HighPriority, std::move(chunk), true);
}


Expand Down Expand Up @@ -3058,6 +3058,198 @@ namespace dxvk {



D3D11ReflexDevice::D3D11ReflexDevice(
D3D11DXGIDevice* pContainer,
D3D11Device* pDevice)
: m_container(pContainer), m_device(pDevice) {
auto dxvkDevice = pDevice->GetDXVKDevice();

m_reflexEnabled = dxvkDevice->features().nvLowLatency2
&& dxvkDevice->config().latencySleep == Tristate::Auto;
}


D3D11ReflexDevice::~D3D11ReflexDevice() {

}


ULONG STDMETHODCALLTYPE D3D11ReflexDevice::AddRef() {
return m_container->AddRef();
}


ULONG STDMETHODCALLTYPE D3D11ReflexDevice::Release() {
return m_container->Release();
}


HRESULT STDMETHODCALLTYPE D3D11ReflexDevice::QueryInterface(
REFIID riid,
void** ppvObject) {
return m_container->QueryInterface(riid, ppvObject);
}


BOOL STDMETHODCALLTYPE D3D11ReflexDevice::SupportsLowLatency() {
return m_reflexEnabled;
}


HRESULT STDMETHODCALLTYPE D3D11ReflexDevice::LatencySleep() {
if (!m_reflexEnabled)
return DXGI_ERROR_INVALID_CALL;

// Don't keep object locked while sleeping
Rc<DxvkReflexLatencyTrackerNv> tracker;

{ std::lock_guard lock(m_mutex);
tracker = m_tracker;
}

if (tracker)
tracker->latencySleep();

return S_OK;
}


HRESULT STDMETHODCALLTYPE D3D11ReflexDevice::SetLatencySleepMode(
BOOL LowLatencyEnable,
BOOL LowLatencyBoost,
UINT32 MinIntervalUs) {
if (!m_reflexEnabled)
return DXGI_ERROR_INVALID_CALL;

std::lock_guard lock(m_mutex);

if (m_tracker) {
m_tracker->setLatencySleepMode(
LowLatencyEnable, LowLatencyBoost, MinIntervalUs);
}

// Write back in case we have no swapchain yet
m_enableLowLatency = LowLatencyEnable;
m_enableBoost = LowLatencyBoost;
m_minIntervalUs = MinIntervalUs;
return S_OK;
}


HRESULT STDMETHODCALLTYPE D3D11ReflexDevice::SetLatencyMarker(
UINT64 FrameId,
UINT32 MarkerType) {
if (!m_reflexEnabled)
return DXGI_ERROR_INVALID_CALL;

std::lock_guard lock(m_mutex);

if (m_tracker) {
auto marker = VkLatencyMarkerNV(MarkerType);
m_tracker->setLatencyMarker(FrameId, marker);

if (marker == VK_LATENCY_MARKER_RENDERSUBMIT_START_NV) {
m_device->GetContext()->InjectCs(DxvkCsQueue::Ordered, [
cTracker = m_tracker,
cFrameId = FrameId
] (DxvkContext* ctx) {
uint64_t frameId = cTracker->frameIdFromAppFrameId(cFrameId);

if (frameId)
ctx->beginLatencyTracking(cTracker, frameId);
});
} else if (marker == VK_LATENCY_MARKER_RENDERSUBMIT_END_NV) {
m_device->GetContext()->InjectCs(DxvkCsQueue::Ordered, [
cTracker = m_tracker
] (DxvkContext* ctx) {
ctx->endLatencyTracking(cTracker);
});
}
}

return S_OK;
}


HRESULT STDMETHODCALLTYPE D3D11ReflexDevice::GetLatencyInfo(
D3D_LOW_LATENCY_RESULTS* pLowLatencyResults) {
constexpr static size_t FrameCount = 64;

if (!pLowLatencyResults)
return E_INVALIDARG;

for (size_t i = 0; i < FrameCount; i++)
pLowLatencyResults->frameReports[i] = D3D_LOW_LATENCY_FRAME_REPORT();

if (!m_reflexEnabled)
return DXGI_ERROR_INVALID_CALL;

std::lock_guard lock(m_mutex);

if (!m_tracker)
return S_OK;

// Apparently we have to report all 64 frames, or nothing
std::array<DxvkReflexFrameReport, FrameCount> reports = { };
uint32_t reportCount = m_tracker->getFrameReports(FrameCount, reports.data());

if (reportCount < FrameCount)
return S_OK;

for (uint32_t i = 0; i < FrameCount; i++) {
auto& src = reports[i];
auto& dst = pLowLatencyResults->frameReports[i];

dst.frameID = src.report.presentID;
dst.inputSampleTime = src.report.inputSampleTimeUs;
dst.simStartTime = src.report.simStartTimeUs;
dst.simEndTime = src.report.simEndTimeUs;
dst.renderSubmitStartTime = src.report.renderSubmitStartTimeUs;
dst.renderSubmitEndTime = src.report.renderSubmitEndTimeUs;
dst.presentStartTime = src.report.presentStartTimeUs;
dst.presentEndTime = src.report.presentEndTimeUs;
dst.driverStartTime = src.report.driverStartTimeUs;
dst.driverEndTime = src.report.driverEndTimeUs;
dst.osRenderQueueStartTime = src.report.osRenderQueueStartTimeUs;
dst.osRenderQueueEndTime = src.report.osRenderQueueEndTimeUs;
dst.gpuRenderStartTime = src.report.gpuRenderStartTimeUs;
dst.gpuRenderEndTime = src.report.gpuRenderEndTimeUs;
dst.gpuActiveRenderTimeUs = src.gpuActiveTimeUs;
dst.gpuFrameTimeUs = 0;

if (i) {
dst.gpuFrameTimeUs = reports[i - 0].report.gpuRenderEndTimeUs
- reports[i - 1].report.gpuRenderEndTimeUs;
}
}

return S_OK;
}


void D3D11ReflexDevice::RegisterLatencyTracker(
Rc<DxvkLatencyTracker> Tracker) {
std::lock_guard lock(m_mutex);

if (m_tracker)
return;

if ((m_tracker = dynamic_cast<DxvkReflexLatencyTrackerNv*>(Tracker.ptr())))
m_tracker->setLatencySleepMode(m_enableLowLatency, m_enableBoost, m_minIntervalUs);
}


void D3D11ReflexDevice::UnregisterLatencyTracker(
Rc<DxvkLatencyTracker> Tracker) {
std::lock_guard lock(m_mutex);

if (m_tracker == Tracker)
m_tracker = nullptr;
}




DXGIVkSwapChainFactory::DXGIVkSwapChainFactory(
D3D11DXGIDevice* pContainer,
D3D11Device* pDevice)
Expand Down Expand Up @@ -3159,6 +3351,7 @@ namespace dxvk {
m_d3d11DeviceExt(this, &m_d3d11Device),
m_d3d11Interop (this, &m_d3d11Device),
m_d3d11Video (this, &m_d3d11Device),
m_d3d11Reflex (this, &m_d3d11Device),
m_d3d11on12 (this, &m_d3d11Device, pD3D12Device, pD3D12Queue),
m_metaDevice (this),
m_dxvkFactory (this, &m_d3d11Device) {
Expand Down Expand Up @@ -3231,6 +3424,11 @@ namespace dxvk {
return S_OK;
}

if (riid == __uuidof(ID3DLowLatencyDevice)) {
*ppvObject = ref(&m_d3d11Reflex);
return S_OK;
}

if (m_d3d11on12.Is11on12Device()) {
if (riid == __uuidof(ID3D11On12Device)) {
*ppvObject = ref(&m_d3d11on12);
Expand Down
Loading
Loading