Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve capture sleep scheduling on Windows #1288

Merged
merged 8 commits into from
Aug 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions docs/source/about/advanced_usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -413,27 +413,6 @@ resolutions
3840x1600,
]

dwmflush
^^^^^^^^

**Description**
Invoke DwmFlush() to sync screen capture to the Windows presentation interval.

.. Caution:: Applies to Windows only. Alleviates visual stuttering during mouse movement.
If enabled, this feature will automatically deactivate if the client framerate exceeds
the host monitor's current refresh rate.

.. Note:: If you disable this option, you may see video stuttering during mouse movement in certain scenarios.
It is recommended to leave enabled when possible.

**Default**
``enabled``

**Example**
.. code-block:: text

dwmflush = enabled

Audio
-----

Expand Down
2 changes: 0 additions & 2 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ namespace config {
{}, // encoder
{}, // adapter_name
{}, // output_name
true // dwmflush
};

audio_t audio {
Expand Down Expand Up @@ -1034,7 +1033,6 @@ namespace config {
string_f(vars, "encoder", video.encoder);
string_f(vars, "adapter_name", video.adapter_name);
string_f(vars, "output_name", video.output_name);
bool_f(vars, "dwmflush", video.dwmflush);

path_f(vars, "pkey", nvhttp.pkey);
path_f(vars, "cert", nvhttp.cert);
Expand Down
1 change: 0 additions & 1 deletion src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ namespace config {
std::string encoder;
std::string adapter_name;
std::string output_name;
bool dwmflush;
};

struct audio_t {
Expand Down
12 changes: 9 additions & 3 deletions src/platform/windows/display.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ namespace platf::dxgi {
public:
dup_t dup;
bool has_frame {};
bool use_dwmflush {};
std::chrono::steady_clock::time_point last_protected_content_warning_time {};

capture_e
Expand All @@ -127,21 +126,28 @@ namespace platf::dxgi {
int
init(const ::video::config_t &config, const std::string &display_name);

void
high_precision_sleep(std::chrono::nanoseconds duration);

capture_e
capture(const push_captured_image_cb_t &push_captured_image_cb, const pull_free_image_cb_t &pull_free_image_cb, bool *cursor) override;

std::chrono::nanoseconds delay;

factory1_t factory;
adapter_t adapter;
output_t output;
device_t device;
device_ctx_t device_ctx;
duplication_t dup;
DXGI_RATIONAL display_refresh_rate;
int display_refresh_rate_rounded;

int client_frame_rate;

DXGI_FORMAT capture_format;
D3D_FEATURE_LEVEL feature_level;

util::safe_ptr_v2<std::remove_pointer_t<HANDLE>, BOOL, CloseHandle> timer;

typedef enum _D3DKMT_SCHEDULINGPRIORITYCLASS {
D3DKMT_SCHEDULINGPRIORITYCLASS_IDLE,
D3DKMT_SCHEDULINGPRIORITYCLASS_BELOW_NORMAL,
Expand Down
191 changes: 133 additions & 58 deletions src/platform/windows/display_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ typedef long NTSTATUS;
#include "src/config.h"
#include "src/main.h"
#include "src/platform/common.h"
#include "src/stat_trackers.h"
#include "src/video.h"

namespace platf {
Expand All @@ -32,10 +33,6 @@ namespace platf::dxgi {
return capture_status;
}

if (use_dwmflush) {
DwmFlush();
}

auto status = dup->AcquireNextFrame(timeout.count(), &frame_info, res_p);

switch (status) {
Expand Down Expand Up @@ -78,19 +75,20 @@ namespace platf::dxgi {
}

auto status = dup->ReleaseFrame();
has_frame = false;
switch (status) {
case S_OK:
has_frame = false;
return capture_e::ok;
case DXGI_ERROR_WAIT_TIMEOUT:
return capture_e::timeout;
case WAIT_ABANDONED:

case DXGI_ERROR_INVALID_CALL:
BOOST_LOG(warning) << "Duplication frame already released";
return capture_e::ok;

case DXGI_ERROR_ACCESS_LOST:
case DXGI_ERROR_ACCESS_DENIED:
has_frame = false;
return capture_e::reinit;

default:
BOOST_LOG(error) << "Couldn't release frame [0x"sv << util::hex(status).to_string_view();
BOOST_LOG(error) << "Error while releasing duplication frame [0x"sv << util::hex(status).to_string_view();
return capture_e::error;
}
}
Expand All @@ -99,24 +97,53 @@ namespace platf::dxgi {
release_frame();
}

void
display_base_t::high_precision_sleep(std::chrono::nanoseconds duration) {
if (!timer) {
BOOST_LOG(error) << "Attempting high_precision_sleep() with uninitialized timer";
return;
}
if (duration < 0s) {
BOOST_LOG(error) << "Attempting high_precision_sleep() with negative duration";
return;
}
if (duration > 5s) {
BOOST_LOG(error) << "Attempting high_precision_sleep() with unexpectedly large duration (>5s)";
return;
}

LARGE_INTEGER due_time;
due_time.QuadPart = duration.count() / -100;
SetWaitableTimer(timer.get(), &due_time, 0, nullptr, nullptr, false);
WaitForSingleObject(timer.get(), INFINITE);
}

capture_e
display_base_t::capture(const push_captured_image_cb_t &push_captured_image_cb, const pull_free_image_cb_t &pull_free_image_cb, bool *cursor) {
auto next_frame = std::chrono::steady_clock::now();

// Use CREATE_WAITABLE_TIMER_HIGH_RESOLUTION if supported (Windows 10 1809+)
HANDLE timer = CreateWaitableTimerEx(nullptr, nullptr, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS);
if (!timer) {
timer = CreateWaitableTimerEx(nullptr, nullptr, 0, TIMER_ALL_ACCESS);
if (!timer) {
auto winerr = GetLastError();
BOOST_LOG(error) << "Failed to create timer: "sv << winerr;
return capture_e::error;
auto adjust_client_frame_rate = [&]() -> DXGI_RATIONAL {
// Adjust capture frame interval when display refresh rate is not integral but very close to requested fps.
if (display_refresh_rate.Denominator > 1) {
DXGI_RATIONAL candidate = display_refresh_rate;
if (client_frame_rate % display_refresh_rate_rounded == 0) {
candidate.Numerator *= client_frame_rate / display_refresh_rate_rounded;
}
else if (display_refresh_rate_rounded % client_frame_rate == 0) {
candidate.Denominator *= display_refresh_rate_rounded / client_frame_rate;
}
double candidate_rate = (double) candidate.Numerator / candidate.Denominator;
// Can only decrease requested fps, otherwise client may start accumulating frames and suffer increased latency.
if (client_frame_rate > candidate_rate && candidate_rate / client_frame_rate > 0.99) {
BOOST_LOG(info) << "Adjusted capture rate to " << candidate_rate << "fps to better match display";
return candidate;
}
}
}

auto close_timer = util::fail_guard([timer]() {
CloseHandle(timer);
});
return { (uint32_t) client_frame_rate, 1 };
};

DXGI_RATIONAL client_frame_rate_adjusted = adjust_client_frame_rate();
std::optional<std::chrono::steady_clock::time_point> frame_pacing_group_start;
uint32_t frame_pacing_group_frames = 0;

// Keep the display awake during capture. If the display goes to sleep during
// capture, best case is that capture stops until it powers back on. However,
Expand All @@ -127,6 +154,8 @@ namespace platf::dxgi {
SetThreadExecutionState(ES_CONTINUOUS);
});

stat_trackers::min_max_avg_tracker<double> sleep_overshoot_tracker;

while (true) {
// This will return false if the HDR state changes or for any number of other
// display or GPU changes. We should reinit to examine the updated state of
Expand All @@ -135,25 +164,65 @@ namespace platf::dxgi {
return platf::capture_e::reinit;
}

// If the wait time is between 1 us and 1 second, wait the specified time
// and offset the next frame time from the exact current frame time target.
auto wait_time_us = std::chrono::duration_cast<std::chrono::microseconds>(next_frame - std::chrono::steady_clock::now()).count();
if (wait_time_us > 0 && wait_time_us < 1000000) {
LARGE_INTEGER due_time { .QuadPart = -10LL * wait_time_us };
SetWaitableTimer(timer, &due_time, 0, nullptr, nullptr, false);
WaitForSingleObject(timer, INFINITE);
next_frame += delay;
platf::capture_e status = capture_e::ok;
std::shared_ptr<img_t> img_out;

// Try to continue frame pacing group, snapshot() is called with zero timeout after waiting for client frame interval
if (frame_pacing_group_start) {
const uint32_t seconds = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator / client_frame_rate_adjusted.Numerator;
const uint32_t remainder = (uint64_t) frame_pacing_group_frames * client_frame_rate_adjusted.Denominator % client_frame_rate_adjusted.Numerator;
const auto sleep_target = *frame_pacing_group_start +
std::chrono::nanoseconds(1s) * seconds +
std::chrono::nanoseconds(1s) * remainder / client_frame_rate_adjusted.Numerator;
const auto sleep_period = sleep_target - std::chrono::steady_clock::now();

if (sleep_period <= 0ns) {
// We missed next frame time, invalidating current frame pacing group
frame_pacing_group_start = std::nullopt;
frame_pacing_group_frames = 0;
status = capture_e::timeout;
}
else {
high_precision_sleep(sleep_period);

if (config::sunshine.min_log_level <= 1) {
// Print sleep overshoot stats to debug log every 20 seconds
auto print_info = [&](double min_overshoot, double max_overshoot, double avg_overshoot) {
auto f = stat_trackers::one_digit_after_decimal();
BOOST_LOG(debug) << "Sleep overshoot (min/max/avg): " << f % min_overshoot << "ms/" << f % max_overshoot << "ms/" << f % avg_overshoot << "ms";
};
std::chrono::nanoseconds overshoot_ns = std::chrono::steady_clock::now() - sleep_target;
sleep_overshoot_tracker.collect_and_callback_on_interval(overshoot_ns.count() / 1000000., print_info, 20s);
}

status = snapshot(pull_free_image_cb, img_out, 0ms, *cursor);

if (status == capture_e::ok && img_out) {
frame_pacing_group_frames += 1;
}
else {
frame_pacing_group_start = std::nullopt;
frame_pacing_group_frames = 0;
}
}
}
else {
// If the wait time is negative (meaning the frame is past due) or the
// computed wait time is beyond a second (meaning possible clock issues),
// just capture the frame now and resynchronize the frame interval with
// the current time.
next_frame = std::chrono::steady_clock::now() + delay;

// Start new frame pacing group if necessary, snapshot() is called with non-zero timeout
if (status == capture_e::timeout || (status == capture_e::ok && !frame_pacing_group_start)) {
status = snapshot(pull_free_image_cb, img_out, 1000ms, *cursor);

if (status == capture_e::ok && img_out) {
frame_pacing_group_start = img_out->frame_timestamp;

if (!frame_pacing_group_start) {
BOOST_LOG(warning) << "snapshot() provided image without timestamp";
frame_pacing_group_start = std::chrono::steady_clock::now();
}

frame_pacing_group_frames = 1;
}
}

std::shared_ptr<img_t> img_out;
auto status = snapshot(pull_free_image_cb, img_out, 1000ms, *cursor);
switch (status) {
case platf::capture_e::reinit:
case platf::capture_e::error:
Expand All @@ -173,6 +242,11 @@ namespace platf::dxgi {
BOOST_LOG(error) << "Unrecognized capture status ["sv << (int) status << ']';
return status;
}

status = dup.release_frame();
if (status != platf::capture_e::ok) {
return status;
}
}

return capture_e::ok;
Expand Down Expand Up @@ -334,8 +408,6 @@ namespace platf::dxgi {
// Ensure we can duplicate the current display
syncThreadDesktop();

delay = std::chrono::nanoseconds { 1s } / config.framerate;

// Get rectangle of full desktop for absolute mouse coordinates
env_width = GetSystemMetrics(SM_CXVIRTUALSCREEN);
env_height = GetSystemMetrics(SM_CYVIRTUALSCREEN);
Expand Down Expand Up @@ -470,21 +542,6 @@ namespace platf::dxgi {
<< "Offset : "sv << offset_x << 'x' << offset_y << std::endl
<< "Virtual Desktop : "sv << env_width << 'x' << env_height;

// Enable DwmFlush() only if the current refresh rate can match the client framerate.
auto refresh_rate = config.framerate;
DWM_TIMING_INFO timing_info;
timing_info.cbSize = sizeof(timing_info);

status = DwmGetCompositionTimingInfo(NULL, &timing_info);
if (FAILED(status)) {
BOOST_LOG(warning) << "Failed to detect active refresh rate.";
}
else {
refresh_rate = std::round((double) timing_info.rateRefresh.uiNumerator / (double) timing_info.rateRefresh.uiDenominator);
}

dup.use_dwmflush = config::video.dwmflush && !(config.framerate > refresh_rate) ? true : false;

// Bump up thread priority
{
const DWORD flags = TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY;
Expand Down Expand Up @@ -610,6 +667,13 @@ namespace platf::dxgi {
BOOST_LOG(info) << "Desktop resolution ["sv << dup_desc.ModeDesc.Width << 'x' << dup_desc.ModeDesc.Height << ']';
BOOST_LOG(info) << "Desktop format ["sv << dxgi_format_to_string(dup_desc.ModeDesc.Format) << ']';

display_refresh_rate = dup_desc.ModeDesc.RefreshRate;
display_refresh_rate_rounded = lround((double) display_refresh_rate.Numerator / display_refresh_rate.Denominator);
BOOST_LOG(info) << "Display refresh rate [" << display_refresh_rate_rounded << "Hz]";

client_frame_rate = config.framerate;
BOOST_LOG(info) << "Requested frame rate [" << client_frame_rate << "fps]";

dxgi::output6_t output6 {};
status = output->QueryInterface(IID_IDXGIOutput6, (void **) &output6);
if (SUCCEEDED(status)) {
Expand All @@ -632,6 +696,17 @@ namespace platf::dxgi {
// Capture format will be determined from the first call to AcquireNextFrame()
capture_format = DXGI_FORMAT_UNKNOWN;

// Use CREATE_WAITABLE_TIMER_HIGH_RESOLUTION if supported (Windows 10 1809+)
timer.reset(CreateWaitableTimerEx(nullptr, nullptr, CREATE_WAITABLE_TIMER_HIGH_RESOLUTION, TIMER_ALL_ACCESS));
if (!timer) {
timer.reset(CreateWaitableTimerEx(nullptr, nullptr, 0, TIMER_ALL_ACCESS));
if (!timer) {
auto winerr = GetLastError();
BOOST_LOG(error) << "Failed to create timer: "sv << winerr;
return -1;
}
}

return 0;
}

Expand Down
2 changes: 1 addition & 1 deletion src/platform/windows/display_vram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -957,7 +957,7 @@ namespace platf::dxgi {
}

const bool mouse_update_flag = frame_info.LastMouseUpdateTime.QuadPart != 0 || frame_info.PointerShapeBufferSize > 0;
const bool frame_update_flag = frame_info.AccumulatedFrames != 0 || frame_info.LastPresentTime.QuadPart != 0;
const bool frame_update_flag = frame_info.LastPresentTime.QuadPart != 0;
const bool update_flag = mouse_update_flag || frame_update_flag;

if (!update_flag) {
Expand Down
2 changes: 1 addition & 1 deletion src/stat_trackers.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ namespace stat_trackers {
struct {
std::chrono::steady_clock::steady_clock::time_point last_callback_time = std::chrono::steady_clock::now();
T stat_min = std::numeric_limits<T>::max();
T stat_max = 0;
T stat_max = std::numeric_limits<T>::min();
double stat_total = 0;
uint32_t calls = 0;
} data;
Expand Down
Loading