From d396f01fcc0e240ebb5e4f9f97dcf3aa628a78a6 Mon Sep 17 00:00:00 2001 From: ns6089 <61738816+ns6089@users.noreply.github.com> Date: Wed, 26 Jun 2024 12:22:56 +0300 Subject: [PATCH] Initial implementation of yuv444in420 encoding --- src/nvenc/nvenc_base.cpp | 17 +- src/nvhttp.cpp | 15 ++ src/platform/common.h | 25 +- src/platform/linux/cuda.cpp | 7 +- src/platform/linux/kmsgrab.cpp | 14 +- src/platform/linux/wlgrab.cpp | 14 +- src/platform/linux/x11grab.cpp | 7 +- src/platform/macos/display.mm | 7 +- src/platform/windows/display.h | 11 +- src/platform/windows/display_ram.cpp | 6 +- src/platform/windows/display_vram.cpp | 228 ++++++++++++++---- src/video.cpp | 70 +++++- src/video.h | 2 +- .../convert_yuv444in420_nv12_uv_ps.hlsl | 4 + ...convert_yuv444in420_nv12_uv_ps_linear.hlsl | 4 + .../convert_yuv444in420_nv12_y_ps.hlsl | 3 + .../convert_yuv444in420_nv12_y_ps_linear.hlsl | 3 + .../convert_yuv444in420_p010_uv_ps.hlsl | 5 + ...convert_yuv444in420_p010_uv_ps_linear.hlsl | 5 + ...in420_p010_uv_ps_perceptual_quantizer.hlsl | 5 + .../convert_yuv444in420_p010_y_ps.hlsl | 4 + .../convert_yuv444in420_p010_y_ps_linear.hlsl | 4 + ...4in420_p010_y_ps_perceptual_quantizer.hlsl | 4 + .../directx/convert_yuv444in420_uv_vs.hlsl | 32 +++ .../directx/convert_yuv444in420_y_vs.hlsl | 33 +++ .../shaders/directx/include/base_vs.hlsl | 12 + .../directx/include/base_vs_types.hlsl | 2 +- .../include/convert_yuv444in420_ps_base.hlsl | 45 ++++ third-party/moonlight-common-c | 2 +- 29 files changed, 508 insertions(+), 82 deletions(-) create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_perceptual_quantizer.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_linear.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_perceptual_quantizer.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_uv_vs.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/convert_yuv444in420_y_vs.hlsl create mode 100644 src_assets/windows/assets/shaders/directx/include/convert_yuv444in420_ps_base.hlsl diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp index b69d6f26bd6..6bf879999e7 100644 --- a/src/nvenc/nvenc_base.cpp +++ b/src/nvenc/nvenc_base.cpp @@ -104,8 +104,16 @@ namespace nvenc { if (encoder) destroy_encoder(); auto fail_guard = util::fail_guard([this] { destroy_encoder(); }); - encoder_params.width = client_config.width; - encoder_params.height = client_config.height; + if (client_config.chromaSamplingType == 2) { + // YUV 4:4:4 recombined into YUV 4:2:0 + auto recombined_dimensions = video::calculate_yuv444in420_dimensions(client_config.width, client_config.height); + encoder_params.width = recombined_dimensions.width; + encoder_params.height = recombined_dimensions.height; + } + else { + encoder_params.width = client_config.width; + encoder_params.height = client_config.height; + } encoder_params.buffer_format = buffer_format; encoder_params.rfi = true; @@ -288,7 +296,7 @@ namespace nvenc { vui_config.colourPrimaries = colorspace.primaries; vui_config.transferCharacteristics = colorspace.tranfer_function; vui_config.colourMatrix = colorspace.matrix; - vui_config.chromaSampleLocationFlag = buffer_is_yuv444() ? 0 : 1; + vui_config.chromaSampleLocationFlag = (client_config.chromaSamplingType == 0) ? 1 : 0; vui_config.chromaSampleLocationTop = 0; vui_config.chromaSampleLocationBot = 0; }; @@ -341,7 +349,7 @@ namespace nvenc { format_config.transferCharacteristics = colorspace.tranfer_function; format_config.matrixCoefficients = colorspace.matrix; format_config.colorRange = colorspace.full_range; - format_config.chromaSamplePosition = buffer_is_yuv444() ? 0 : 1; + format_config.chromaSamplePosition = (client_config.chromaSamplingType == 0) ? 1 : 0; set_ref_frames(format_config.maxNumRefFramesInDPB, format_config.numFwdRefs, 8); set_minqp_if_enabled(config.min_qp_av1); @@ -395,6 +403,7 @@ namespace nvenc { std::string extra; if (init_params.enableEncodeAsync) extra += " async"; if (buffer_is_yuv444()) extra += " yuv444"; + if (client_config.chromaSamplingType == 2) extra += " yuv444in420"; if (buffer_is_10bit()) extra += " 10-bit"; if (enc_config.rcParams.multiPass != NV_ENC_MULTI_PASS_DISABLED) extra += " two-pass"; if (config.vbv_percentage_increase > 0 && get_encoder_cap(NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE)) extra += " vbv+" + std::to_string(config.vbv_percentage_increase); diff --git a/src/nvhttp.cpp b/src/nvhttp.cpp index 8ac567970eb..72e01ab45e6 100644 --- a/src/nvhttp.cpp +++ b/src/nvhttp.cpp @@ -722,30 +722,45 @@ namespace nvhttp { uint32_t codec_mode_flags = SCM_H264; if (video::last_encoder_probe_supported_yuv444_for_codec[0]) { codec_mode_flags |= SCM_H264_HIGH8_444; + if (video::last_encoder_probe_supported_yuv444in420) { + codec_mode_flags |= SCM_H264_HIGH8_444IN420; + } } if (video::active_hevc_mode >= 2) { codec_mode_flags |= SCM_HEVC; if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { codec_mode_flags |= SCM_HEVC_REXT8_444; } + if (video::last_encoder_probe_supported_yuv444in420) { + codec_mode_flags |= SCM_HEVC_MAIN8_444IN420; + } } if (video::active_hevc_mode >= 3) { codec_mode_flags |= SCM_HEVC_MAIN10; if (video::last_encoder_probe_supported_yuv444_for_codec[1]) { codec_mode_flags |= SCM_HEVC_REXT10_444; } + if (video::last_encoder_probe_supported_yuv444in420) { + codec_mode_flags |= SCM_HEVC_MAIN10_444IN420; + } } if (video::active_av1_mode >= 2) { codec_mode_flags |= SCM_AV1_MAIN8; if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { codec_mode_flags |= SCM_AV1_HIGH8_444; } + if (video::last_encoder_probe_supported_yuv444in420) { + codec_mode_flags |= SCM_AV1_MAIN8_444IN420; + } } if (video::active_av1_mode >= 3) { codec_mode_flags |= SCM_AV1_MAIN10; if (video::last_encoder_probe_supported_yuv444_for_codec[2]) { codec_mode_flags |= SCM_AV1_HIGH10_444; } + if (video::last_encoder_probe_supported_yuv444in420) { + codec_mode_flags |= SCM_AV1_MAIN10_444IN420; + } } tree.put("root.ServerCodecModeSupport", codec_mode_flags); diff --git a/src/platform/common.h b/src/platform/common.h index 5009c18335a..d3bd794a112 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -483,13 +483,25 @@ namespace platf { virtual int dummy_img(img_t *img) = 0; + /** + * @brief Create AVCodec encode device. + * @param pix_fmt_e Surface format of the encoder. + * @param yuv444in420 Whether YUV 4:4:4 recombination into YUV 4:2:0 must be performed. + * @return `unique_ptr` with `avcodec_encode_device_t` implementation on success, `nullptr` on failure. + */ virtual std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) { + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { return nullptr; } + /** + * @brief Create NVENC encode device. + * @param pix_fmt_e Surface format of the encoder. + * @param yuv444in420 Whether YUV 4:4:4 recombination into YUV 4:2:0 must be performed. + * @return `unique_ptr` with `nvenc_encode_device_t` implementation on success, `nullptr` on failure. + */ virtual std::unique_ptr - make_nvenc_encode_device(pix_fmt_e pix_fmt) { + make_nvenc_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { return nullptr; } @@ -515,6 +527,15 @@ namespace platf { return true; } + /** + * @brief Check if YUV 4:4:4 recombination into YUV 4:2:0 is supported by the display device. + * @return `true` if supported, `false` otherwise. + */ + virtual bool + is_yuv444in420_supported() { + return false; + } + virtual ~display_t() = default; // Offsets for when streaming a specific monitor. By default, they are 0. diff --git a/src/platform/linux/cuda.cpp b/src/platform/linux/cuda.cpp index 5498d9a81d5..00656d36eaf 100644 --- a/src/platform/linux/cuda.cpp +++ b/src/platform/linux/cuda.cpp @@ -961,7 +961,12 @@ namespace cuda { } std::unique_ptr - make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) { + make_avcodec_encode_device(platf::pix_fmt_e pix_fmt, bool yuv444in420) { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + return ::cuda::make_avcodec_encode_device(width, height, true); } diff --git a/src/platform/linux/kmsgrab.cpp b/src/platform/linux/kmsgrab.cpp index 5b77d6064c1..1b6fa549579 100644 --- a/src/platform/linux/kmsgrab.cpp +++ b/src/platform/linux/kmsgrab.cpp @@ -1236,7 +1236,12 @@ namespace platf { } std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + #ifdef SUNSHINE_BUILD_VAAPI if (mem_type == mem_type_e::vaapi) { return va::make_avcodec_encode_device(width, height, false); @@ -1373,7 +1378,12 @@ namespace platf { display_t(mem_type) {} std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + #ifdef SUNSHINE_BUILD_VAAPI if (mem_type == mem_type_e::vaapi) { return va::make_avcodec_encode_device(width, height, dup(card.render_fd.el), img_offset_x, img_offset_y, true); diff --git a/src/platform/linux/wlgrab.cpp b/src/platform/linux/wlgrab.cpp index 4c5946c84d3..34e92b43b2f 100644 --- a/src/platform/linux/wlgrab.cpp +++ b/src/platform/linux/wlgrab.cpp @@ -226,7 +226,12 @@ namespace wl { } std::unique_ptr - make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(platf::pix_fmt_e pix_fmt, bool yuv444in420) override { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + #ifdef SUNSHINE_BUILD_VAAPI if (mem_type == platf::mem_type_e::vaapi) { return va::make_avcodec_encode_device(width, height, false); @@ -349,7 +354,12 @@ namespace wl { } std::unique_ptr - make_avcodec_encode_device(platf::pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(platf::pix_fmt_e pix_fmt, bool yuv444in420) override { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + #ifdef SUNSHINE_BUILD_VAAPI if (mem_type == platf::mem_type_e::vaapi) { return va::make_avcodec_encode_device(width, height, 0, 0, true); diff --git a/src/platform/linux/x11grab.cpp b/src/platform/linux/x11grab.cpp index bcb2ff306f7..c6fda6239f6 100644 --- a/src/platform/linux/x11grab.cpp +++ b/src/platform/linux/x11grab.cpp @@ -561,7 +561,12 @@ namespace platf { } std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + #ifdef SUNSHINE_BUILD_VAAPI if (mem_type == mem_type_e::vaapi) { return va::make_avcodec_encode_device(width, height, false); diff --git a/src/platform/macos/display.mm b/src/platform/macos/display.mm index 093e8c17f2b..df2ea89c76f 100644 --- a/src/platform/macos/display.mm +++ b/src/platform/macos/display.mm @@ -79,7 +79,12 @@ } std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override { + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } + if (pix_fmt == pix_fmt_e::yuv420p) { av_capture.pixelFormat = kCVPixelFormatType_32BGRA; diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index 3e035490394..a35f1946b9a 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -272,7 +272,7 @@ namespace platf::dxgi { get_supported_capture_formats() override; std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override; + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override; D3D11_MAPPED_SUBRESOURCE img_info; texture2d_t texture; @@ -295,11 +295,16 @@ namespace platf::dxgi { bool is_codec_supported(std::string_view name, const ::video::config_t &config) override; + bool + is_yuv444in420_supported() override { + return true; + } + std::unique_ptr - make_avcodec_encode_device(pix_fmt_e pix_fmt) override; + make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override; std::unique_ptr - make_nvenc_encode_device(pix_fmt_e pix_fmt) override; + make_nvenc_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) override; std::atomic next_image_id; }; diff --git a/src/platform/windows/display_ram.cpp b/src/platform/windows/display_ram.cpp index 1105c674c30..70c7739f588 100644 --- a/src/platform/windows/display_ram.cpp +++ b/src/platform/windows/display_ram.cpp @@ -398,7 +398,11 @@ namespace platf::dxgi { } std::unique_ptr - display_ram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) { + display_ram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { + if (yuv444in420) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported"; + return nullptr; + } return std::make_unique(); } diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 96ddff84258..3c8d7a76d64 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -121,6 +121,18 @@ namespace platf::dxgi { blob_t convert_yuv444_packed_y410_ps_linear_hlsl; blob_t convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl; blob_t convert_yuv444_planar_vs_hlsl; + blob_t convert_yuv444in420_nv12_uv_ps_hlsl; + blob_t convert_yuv444in420_nv12_uv_ps_linear_hlsl; + blob_t convert_yuv444in420_nv12_y_ps_hlsl; + blob_t convert_yuv444in420_nv12_y_ps_linear_hlsl; + blob_t convert_yuv444in420_p010_uv_ps_hlsl; + blob_t convert_yuv444in420_p010_uv_ps_linear_hlsl; + blob_t convert_yuv444in420_p010_uv_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv444in420_p010_y_ps_hlsl; + blob_t convert_yuv444in420_p010_y_ps_linear_hlsl; + blob_t convert_yuv444in420_p010_y_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv444in420_uv_vs_hlsl; + blob_t convert_yuv444in420_y_vs_hlsl; blob_t cursor_ps_hlsl; blob_t cursor_ps_normalize_white_hlsl; blob_t cursor_vs_hlsl; @@ -413,14 +425,16 @@ namespace platf::dxgi { return -1; } - auto draw = [&](auto &input, auto &y_or_yuv_viewports, auto &uv_viewport) { + auto draw = [&](auto &input, auto &y_or_yuv_viewports, auto &uv_viewports) { device_ctx->PSSetShaderResources(0, 1, &input); // Draw Y/YUV device_ctx->OMSetRenderTargets(1, &out_Y_or_YUV_rtv, nullptr); device_ctx->VSSetShader(convert_Y_or_YUV_vs.get(), nullptr, 0); device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_or_YUV_fp16_ps.get() : convert_Y_or_YUV_ps.get(), nullptr, 0); - auto viewport_count = (format == DXGI_FORMAT_R16_UINT) ? 3 : 1; + auto viewport_count = format == DXGI_FORMAT_R16_UINT ? 3 : + recombine_yuv444_into_yuv420 ? 2 : + 1; assert(viewport_count <= y_or_yuv_viewports.size()); device_ctx->RSSetViewports(viewport_count, y_or_yuv_viewports.data()); device_ctx->Draw(3 * viewport_count, 0); // vertex shader will spread vertices across viewports @@ -431,20 +445,22 @@ namespace platf::dxgi { device_ctx->OMSetRenderTargets(1, &out_UV_rtv, nullptr); device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0); device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0); - device_ctx->RSSetViewports(1, &uv_viewport); - device_ctx->Draw(3, 0); + viewport_count = recombine_yuv444_into_yuv420 ? 2 : 1; + assert(viewport_count <= uv_viewports.size()); + device_ctx->RSSetViewports(viewport_count, uv_viewports.data()); + device_ctx->Draw(3 * viewport_count, 0); // vertex shader will spread vertices across viewports } }; // Clear render target view(s) once so that the aspect ratio mismatch "bars" appear black if (!rtvs_cleared) { auto black = create_black_texture_for_rtv_clear(); - if (black) draw(black, out_Y_or_YUV_viewports_for_clear, out_UV_viewport_for_clear); + if (black) draw(black, out_Y_or_YUV_viewports_for_clear, out_UV_viewports_for_clear); rtvs_cleared = true; } // Draw captured frame - draw(img_ctx.encoder_input_res, out_Y_or_YUV_viewports, out_UV_viewport); + draw(img_ctx.encoder_input_res, out_Y_or_YUV_viewports, out_UV_viewports); // Release encoder mutex to allow capture code to reuse this image img_ctx.encoder_mutex->ReleaseSync(0); @@ -462,7 +478,8 @@ namespace platf::dxgi { if (format == DXGI_FORMAT_AYUV || format == DXGI_FORMAT_R16_UINT || - format == DXGI_FORMAT_Y410) { + format == DXGI_FORMAT_Y410 || + recombine_yuv444_into_yuv420) { color_vectors = ::video::new_color_vectors_from_colorspace(colorspace); } @@ -503,20 +520,68 @@ namespace platf::dxgi { auto offsetX = (out_width - out_width_f) / 2; auto offsetY = (out_height - out_height_f) / 2; - out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; // Y plane - out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0]; // U plane - out_Y_or_YUV_viewports[1].TopLeftY += out_height; - out_Y_or_YUV_viewports[2] = out_Y_or_YUV_viewports[1]; // V plane - out_Y_or_YUV_viewports[2].TopLeftY += out_height; + if (recombine_yuv444_into_yuv420) { + auto recombined_dimensions = ::video::calculate_yuv444in420_dimensions(width, height); + + // Vertical stacking | Horizontal stacking + // | + // Y U V | Y U V + // +-------+ +---+ +---+ | +-------+-------+ +---+---+ +---+---+ + // | | |V0 | |V1 | | | | | |V0 |V2 | |V1 |V3 | + // | Y | +---+ +---+ | | Y | U | +---+---+ +---+---+ + // | | |V2 | |V3 | | | | | + // +-------+ +---+ +---+ | +-------+-------+ + // | | | + // | U | | + // | | | + // +-------+ | + + out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0]; // U plane + if (recombined_dimensions.vertical_stacking) + out_Y_or_YUV_viewports[1].TopLeftY += recombined_dimensions.stack_dimension; + else + out_Y_or_YUV_viewports[1].TopLeftX += recombined_dimensions.stack_dimension; + + out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0]; // U plane + if (recombined_dimensions.vertical_stacking) + out_Y_or_YUV_viewports_for_clear[1].TopLeftY += recombined_dimensions.stack_dimension; + else + out_Y_or_YUV_viewports_for_clear[1].TopLeftX += recombined_dimensions.stack_dimension; + + // V plane + out_UV_viewports[0] = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; + out_UV_viewports[1] = out_UV_viewports[0]; + if (recombined_dimensions.vertical_stacking) + out_UV_viewports[1].TopLeftY += recombined_dimensions.stack_dimension / 2; + else + out_UV_viewports[1].TopLeftX += recombined_dimensions.stack_dimension / 2; + + // V plane + out_UV_viewports_for_clear[0] = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f }; + out_UV_viewports_for_clear[1] = out_UV_viewports_for_clear[0]; + if (recombined_dimensions.vertical_stacking) + out_UV_viewports_for_clear[1].TopLeftY += recombined_dimensions.stack_dimension / 2; + else + out_UV_viewports_for_clear[1].TopLeftX += recombined_dimensions.stack_dimension / 2; + } + else { + out_Y_or_YUV_viewports[0] = { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports[1] = out_Y_or_YUV_viewports[0]; // U plane + out_Y_or_YUV_viewports[1].TopLeftY += out_height; + out_Y_or_YUV_viewports[2] = out_Y_or_YUV_viewports[1]; // V plane + out_Y_or_YUV_viewports[2].TopLeftY += out_height; - out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f }; // Y plane - out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0]; // U plane - out_Y_or_YUV_viewports_for_clear[1].TopLeftY += out_height; - out_Y_or_YUV_viewports_for_clear[2] = out_Y_or_YUV_viewports_for_clear[1]; // V plane - out_Y_or_YUV_viewports_for_clear[2].TopLeftY += out_height; + out_Y_or_YUV_viewports_for_clear[0] = { 0, 0, (float) out_width, (float) out_height, 0.0f, 1.0f }; // Y plane + out_Y_or_YUV_viewports_for_clear[1] = out_Y_or_YUV_viewports_for_clear[0]; // U plane + out_Y_or_YUV_viewports_for_clear[1].TopLeftY += out_height; + out_Y_or_YUV_viewports_for_clear[2] = out_Y_or_YUV_viewports_for_clear[1]; // V plane + out_Y_or_YUV_viewports_for_clear[2].TopLeftY += out_height; - out_UV_viewport = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; - out_UV_viewport_for_clear = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f }; + out_UV_viewports[0] = { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; + out_UV_viewports_for_clear[0] = { 0, 0, (float) out_width / 2, (float) out_height / 2, 0.0f, 1.0f }; + } float subsample_offset_in[16 / sizeof(float)] { 1.0f / (float) out_width_f, 1.0f / (float) out_height_f }; // aligned to 16-byte subsample_offset = make_buffer(device.get(), subsample_offset_in); @@ -544,15 +609,27 @@ namespace platf::dxgi { switch (format) { case DXGI_FORMAT_NV12: - rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UNORM; - rtv_UV_format = DXGI_FORMAT_R8G8_UNORM; - rtv_simple_clear = true; + if (recombine_yuv444_into_yuv420) { + rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UINT; + rtv_UV_format = DXGI_FORMAT_R8G8_UINT; + } + else { + rtv_Y_or_YUV_format = DXGI_FORMAT_R8_UNORM; + rtv_UV_format = DXGI_FORMAT_R8G8_UNORM; + rtv_simple_clear = true; + } break; case DXGI_FORMAT_P010: - rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UNORM; - rtv_UV_format = DXGI_FORMAT_R16G16_UNORM; - rtv_simple_clear = true; + if (recombine_yuv444_into_yuv420) { + rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UINT; + rtv_UV_format = DXGI_FORMAT_R16G16_UINT; + } + else { + rtv_Y_or_YUV_format = DXGI_FORMAT_R16_UNORM; + rtv_UV_format = DXGI_FORMAT_R16G16_UNORM; + rtv_simple_clear = true; + } break; case DXGI_FORMAT_AYUV: @@ -611,7 +688,14 @@ namespace platf::dxgi { } int - init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { + init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) { + if (yuv444in420 && pix_fmt != pix_fmt_e::nv12 && pix_fmt != pix_fmt_e::p010) { + BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported on this surface format"; + return -1; + } + + recombine_yuv444_into_yuv420 = yuv444in420; + switch (pix_fmt) { case pix_fmt_e::nv12: format = DXGI_FORMAT_NV12; @@ -690,27 +774,53 @@ namespace platf::dxgi { switch (format) { case DXGI_FORMAT_NV12: // Semi-planar 8-bit YUV 4:2:0 - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + if (recombine_yuv444_into_yuv420) { + create_vertex_shader_helper(convert_yuv444in420_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444in420_nv12_y_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv444in420_nv12_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_vertex_shader_helper(convert_yuv444in420_uv_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv444in420_nv12_uv_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv444in420_nv12_uv_ps_linear_hlsl, convert_UV_fp16_ps); + } + else { + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } break; case DXGI_FORMAT_P010: // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + if (recombine_yuv444_into_yuv420) { + create_vertex_shader_helper(convert_yuv444in420_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444in420_p010_y_ps_hlsl, convert_Y_or_YUV_ps); + create_vertex_shader_helper(convert_yuv444in420_uv_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv444in420_p010_uv_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444in420_p010_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv444in420_p010_uv_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444in420_p010_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv444in420_p010_uv_ps_linear_hlsl, convert_UV_fp16_ps); + } } else { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } } break; @@ -902,6 +1012,8 @@ namespace platf::dxgi { return resource_view; } + bool recombine_yuv444_into_yuv420 = false; + ::video::color_t *color_p; buf_t subsample_offset; @@ -931,7 +1043,7 @@ namespace platf::dxgi { ps_t convert_UV_fp16_ps; std::array out_Y_or_YUV_viewports, out_Y_or_YUV_viewports_for_clear; - D3D11_VIEWPORT out_UV_viewport, out_UV_viewport_for_clear; + std::array out_UV_viewports, out_UV_viewports_for_clear; DXGI_FORMAT format; @@ -944,9 +1056,10 @@ namespace platf::dxgi { class d3d_avcodec_encode_device_t: public avcodec_encode_device_t { public: int - init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { - int result = base.init(display, adapter_p, pix_fmt); + init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) { + int result = base.init(display, adapter_p, pix_fmt, yuv444in420); data = base.device.get(); + recombine_yuv444_into_yuv420 = yuv444in420; return result; } @@ -1030,25 +1143,26 @@ namespace platf::dxgi { frame_texture = (ID3D11Texture2D *) frame->data[0]; } - return base.init_output(frame_texture, frame->width, frame->height); + return base.init_output(frame_texture, frame->width, recombine_yuv444_into_yuv420 ? frame->height / 2 : frame->height); } private: d3d_base_encode_device base; frame_t hwframe; + bool recombine_yuv444_into_yuv420 = false; }; class d3d_nvenc_encode_device_t: public nvenc_encode_device_t { public: bool - init_device(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { + init_device(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) { buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt); if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) { BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']'; return false; } - if (base.init(display, adapter_p, pix_fmt)) return false; + if (base.init(display, adapter_p, pix_fmt, yuv444in420)) return false; if (pix_fmt == pix_fmt_e::yuv444p16) { nvenc_d3d = std::make_unique(base.device.get()); @@ -1893,18 +2007,18 @@ namespace platf::dxgi { } std::unique_ptr - display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt) { + display_vram_t::make_avcodec_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { auto device = std::make_unique(); - if (device->init(shared_from_this(), adapter.get(), pix_fmt) != 0) { + if (device->init(shared_from_this(), adapter.get(), pix_fmt, yuv444in420) != 0) { return nullptr; } return device; } std::unique_ptr - display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt) { + display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) { auto device = std::make_unique(); - if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) { + if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt, yuv444in420)) { return nullptr; } return device; @@ -1937,6 +2051,18 @@ namespace platf::dxgi { compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear); compile_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer); compile_vertex_shader_helper(convert_yuv444_planar_vs); + compile_pixel_shader_helper(convert_yuv444in420_nv12_uv_ps); + compile_pixel_shader_helper(convert_yuv444in420_nv12_uv_ps_linear); + compile_pixel_shader_helper(convert_yuv444in420_nv12_y_ps); + compile_pixel_shader_helper(convert_yuv444in420_nv12_y_ps_linear); + compile_pixel_shader_helper(convert_yuv444in420_p010_uv_ps); + compile_pixel_shader_helper(convert_yuv444in420_p010_uv_ps_linear); + compile_pixel_shader_helper(convert_yuv444in420_p010_uv_ps_perceptual_quantizer); + compile_pixel_shader_helper(convert_yuv444in420_p010_y_ps); + compile_pixel_shader_helper(convert_yuv444in420_p010_y_ps_linear); + compile_pixel_shader_helper(convert_yuv444in420_p010_y_ps_perceptual_quantizer); + compile_vertex_shader_helper(convert_yuv444in420_uv_vs); + compile_vertex_shader_helper(convert_yuv444in420_y_vs); compile_pixel_shader_helper(cursor_ps); compile_pixel_shader_helper(cursor_ps_normalize_white); compile_vertex_shader_helper(cursor_vs); diff --git a/src/video.cpp b/src/video.cpp index 8c5829a2afb..f1c89be49c9 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -992,6 +992,7 @@ namespace video { int active_av1_mode; bool last_encoder_probe_supported_ref_frames_invalidation = false; std::array last_encoder_probe_supported_yuv444_for_codec = {}; + bool last_encoder_probe_supported_yuv444in420 = false; void reset_display(std::shared_ptr &disp, const platf::mem_type_e &type, const std::string &display_name, const config_t &config) { @@ -1458,11 +1459,18 @@ namespace video { } auto colorspace = encode_device->colorspace; - auto sw_fmt = (colorspace.bit_depth == 8 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_8bit : - (colorspace.bit_depth == 8 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_8bit : - (colorspace.bit_depth == 10 && config.chromaSamplingType == 0) ? platform_formats->avcodec_pix_fmt_10bit : - (colorspace.bit_depth == 10 && config.chromaSamplingType == 1) ? platform_formats->avcodec_pix_fmt_yuv444_10bit : - AV_PIX_FMT_NONE; + + AVPixelFormat sw_fmt = AV_PIX_FMT_NONE; + if (colorspace.bit_depth == 8) { + sw_fmt = (config.chromaSamplingType == 1) ? + platform_formats->avcodec_pix_fmt_yuv444_8bit : + platform_formats->avcodec_pix_fmt_8bit; + } + else if (colorspace.bit_depth == 10) { + sw_fmt = (config.chromaSamplingType == 1) ? + platform_formats->avcodec_pix_fmt_yuv444_10bit : + platform_formats->avcodec_pix_fmt_10bit; + } // Allow up to 1 retry to apply the set of fallback options. // @@ -1472,8 +1480,16 @@ namespace video { avcodec_ctx_t ctx; for (int retries = 0; retries < 2; retries++) { ctx.reset(avcodec_alloc_context3(codec)); - ctx->width = config.width; - ctx->height = config.height; + if (config.chromaSamplingType == 2) { + // YUV 4:4:4 recombined into YUV 4:2:0 + auto recombined_dimensions = video::calculate_yuv444in420_dimensions(config.width, config.height); + ctx->width = recombined_dimensions.width; + ctx->height = recombined_dimensions.height; + } + else { + ctx->width = config.width; + ctx->height = config.height; + } ctx->time_base = AVRational { 1, config.framerate }; ctx->framerate = AVRational { config.framerate, 1 }; @@ -1718,7 +1734,7 @@ namespace video { frame->color_primaries = ctx->color_primaries; frame->color_trc = ctx->color_trc; frame->colorspace = ctx->colorspace; - frame->chroma_location = ctx->chroma_sample_location; + frame->chroma_location = (config.chromaSamplingType == 2) ? AVCHROMA_LOC_UNSPECIFIED : ctx->chroma_sample_location; // Attach HDR metadata to the AVFrame if (colorspace_is_hdr(colorspace)) { @@ -1759,7 +1775,7 @@ namespace video { if (!encode_device->data) { auto software_encode_device = std::make_unique(); - if (software_encode_device->init(width, height, frame.get(), sw_fmt, hardware)) { + if (software_encode_device->init(ctx->width, ctx->height, frame.get(), sw_fmt, hardware)) { return nullptr; } software_encode_device->colorspace = colorspace; @@ -1964,10 +1980,10 @@ namespace video { } if (dynamic_cast(encoder.platform_formats.get())) { - result = disp.make_avcodec_encode_device(pix_fmt); + result = disp.make_avcodec_encode_device(pix_fmt, config.chromaSamplingType == 2); } else if (dynamic_cast(encoder.platform_formats.get())) { - result = disp.make_nvenc_encode_device(pix_fmt); + result = disp.make_nvenc_encode_device(pix_fmt, config.chromaSamplingType == 2); } if (result) { @@ -2506,6 +2522,14 @@ namespace video { encoder.av1.capabilities.reset(); } + // Set YUV 4:4:4 in 4:2:0 recombination capabilities + { + const bool supported = disp->is_yuv444in420_supported(); + if (encoder.h264[encoder_t::PASSED]) encoder.h264[encoder_t::YUV444_IN_420] = supported; + if (encoder.hevc[encoder_t::PASSED]) encoder.hevc[encoder_t::YUV444_IN_420] = supported; + if (encoder.av1[encoder_t::PASSED]) encoder.av1[encoder_t::YUV444_IN_420] = supported; + } + // Test HDR and YUV444 support { // H.264 is special because encoders may support YUV 4:4:4 without supporting 10-bit color depth @@ -2725,6 +2749,8 @@ namespace video { encoder.hevc[encoder_t::YUV444]; last_encoder_probe_supported_yuv444_for_codec[2] = encoder.av1[encoder_t::PASSED] && encoder.av1[encoder_t::YUV444]; + last_encoder_probe_supported_yuv444in420 = encoder.h264[encoder_t::PASSED] && + encoder.h264[encoder_t::YUV444_IN_420]; BOOST_LOG(debug) << "------ h264 ------"sv; for (int x = 0; x < encoder_t::MAX_FLAGS; ++x) { @@ -2932,4 +2958,26 @@ namespace video { return platf::pix_fmt_e::unknown; } + yuv444in420_dimensions_t + calculate_yuv444in420_dimensions(uint32_t width, uint32_t height) { + if (width >= height) { + auto stack = (height + 7) / 8 * 8; // pad to 8x8 transform block + return { + .width = width, + .height = stack * 2, + .stack_dimension = stack, + .vertical_stacking = true, + }; + } + else { + auto stack = (width + 7) / 8 * 8; // pad to 8x8 transform block + return { + .width = stack * 2, + .height = height, + .stack_dimension = stack, + .vertical_stacking = false, + }; + } + } + } // namespace video diff --git a/src/video.h b/src/video.h index 6a50b2e3832..7018fc0c205 100644 --- a/src/video.h +++ b/src/video.h @@ -37,7 +37,7 @@ namespace video { HDR encoding activates when color depth is higher than 8-bit and the display which is being captured is operating in HDR mode */ int dynamicRange; - int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4 + int chromaSamplingType; // 0 - 4:2:0, 1 - 4:4:4, 2 - 4:4:4 recombined into 4:2:0 }; platf::mem_type_e diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps.hlsl new file mode 100644 index 00000000000..0b36b432566 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_base.hlsl" + +#define RECOMBINED444_V_SAMPLING +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps_linear.hlsl new file mode 100644 index 00000000000..3ea077093ae --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_uv_ps_linear.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_linear_base.hlsl" + +#define RECOMBINED444_V_SAMPLING +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps.hlsl new file mode 100644 index 00000000000..7db6ec3e3eb --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps.hlsl @@ -0,0 +1,3 @@ +#include "include/convert_base.hlsl" + +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps_linear.hlsl new file mode 100644 index 00000000000..77bcc4a86c2 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_nv12_y_ps_linear.hlsl @@ -0,0 +1,3 @@ +#include "include/convert_linear_base.hlsl" + +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps.hlsl new file mode 100644 index 00000000000..daf6a6bf883 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_base.hlsl" + +#define P010 +#define RECOMBINED444_V_SAMPLING +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_linear.hlsl new file mode 100644 index 00000000000..6d11690faa8 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_linear.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_linear_base.hlsl" + +#define P010 +#define RECOMBINED444_V_SAMPLING +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000000..5e26963462f --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_uv_ps_perceptual_quantizer.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define P010 +#define RECOMBINED444_V_SAMPLING +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps.hlsl new file mode 100644 index 00000000000..8f219e30f09 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_base.hlsl" + +#define P010 +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_linear.hlsl new file mode 100644 index 00000000000..05d15a3f48d --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_linear.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_linear_base.hlsl" + +#define P010 +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000000..6659ea02384 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_p010_y_ps_perceptual_quantizer.hlsl @@ -0,0 +1,4 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define P010 +#include "include/convert_yuv444in420_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_uv_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_uv_vs.hlsl new file mode 100644 index 00000000000..82ee597cdee --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_uv_vs.hlsl @@ -0,0 +1,32 @@ +cbuffer subsample_offset_cbuffer : register(b0) { + float2 subsample_offset; +}; + +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +cbuffer color_matrix_cbuffer : register(b3) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; + float2 range_y; + float2 range_uv; +}; + +#define PLANAR_VIEWPORTS +#define RECOMBINED444_V_SAMPLING +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + // vertex_id 0,1,2 : first recombined V viewport + // vertex_id 3,4,5 : second recombined V viewport + + vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, subsample_offset / 2, vertex_id / 3, rotate_texture_steps); + + output.viewport = vertex_id / 3; + output.color_vec = color_vec_v; + + return output; +} diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv444in420_y_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_y_vs.hlsl new file mode 100644 index 00000000000..85c885b53f5 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv444in420_y_vs.hlsl @@ -0,0 +1,33 @@ +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +cbuffer color_matrix_cbuffer : register(b3) { + float4 color_vec_y; + float4 color_vec_u; + float4 color_vec_v; + float2 range_y; + float2 range_uv; +}; + +#define PLANAR_VIEWPORTS +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + // vertex_id 0,1,2 : Y viewport + // vertex_id 3,4,5 : recombined U viewport + + vertex_t output = generate_fullscreen_triangle_vertex(vertex_id % 3, rotate_texture_steps); + + output.viewport = vertex_id / 3; + + if (output.viewport == 0) { + output.color_vec = color_vec_y; + } + else { + output.color_vec = color_vec_u; + } + + return output; +} diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl index c39e7c6f80b..e049bcce1d3 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl @@ -4,6 +4,8 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps) #elif defined(TOPLEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps) +#elif defined(RECOMBINED444_V_SAMPLING) +vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 sample_offset, bool second_stack, int rotate_texture_steps) #else vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_steps) #endif @@ -37,6 +39,16 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_ #elif defined (TOPLEFT_SUBSAMPLING) output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y); output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y); +#elif defined(RECOMBINED444_V_SAMPLING) + // 0 1 + // x + // 2 3 + // if (!second_stack) { left = 0; right = 1 } + // else { left = 2; right = 3 } + output.tex_right_left_center = float3(tex_coord.x + sample_offset.x, + tex_coord.x - sample_offset.x, + second_stack ? tex_coord.y + sample_offset.y : + tex_coord.y - sample_offset.y); #else output.tex_coord = tex_coord; #endif diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl index cf755c5a0ff..2cc226a74b8 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl @@ -1,7 +1,7 @@ struct vertex_t { float4 viewpoint_pos : SV_Position; -#if defined(LEFT_SUBSAMPLING) +#if defined(LEFT_SUBSAMPLING) || defined(RECOMBINED444_V_SAMPLING) float3 tex_right_left_center : TEXCOORD; #elif defined (TOPLEFT_SUBSAMPLING) float3 tex_right_left_top : TEXCOORD; diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv444in420_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv444in420_ps_base.hlsl new file mode 100644 index 00000000000..cc8364a94c9 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv444in420_ps_base.hlsl @@ -0,0 +1,45 @@ +Texture2D image : register(t0); +SamplerState def_sampler : register(s0); + +#define PLANAR_VIEWPORTS +#include "include/base_vs_types.hlsl" + +#ifdef RECOMBINED444_V_SAMPLING +uint2 main_ps(vertex_t input) : SV_Target +#else +uint main_ps(vertex_t input) : SV_Target +#endif +{ + // Vertical stacking | Horizontal stacking + // | + // Y U V | Y U V + // +-------+ +---+ +---+ | +-------+-------+ +---+---+ +---+---+ + // | | |V0 | |V1 | | | | | |V0 |V2 | |V1 |V3 | + // | Y | +---+ +---+ | | Y | U | +---+---+ +---+---+ + // | | |V2 | |V3 | | | | | + // +-------+ +---+ +---+ | +-------+-------+ + // | | | + // | U | | + // | | | + // +-------+ | + +#ifdef RECOMBINED444_V_SAMPLING + float3 rgb_0_or_2 = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_right_left_center.yz).rgb); + float3 rgb_1_or_3 = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_right_left_center.xz).rgb); + uint2 vv = uint2(dot(input.color_vec.xyz, rgb_0_or_2) + input.color_vec.w, + dot(input.color_vec.xyz, rgb_1_or_3) + input.color_vec.w); +#ifdef P010 + return vv << 6; +#else + return vv; +#endif +#else + float3 rgb = CONVERT_FUNCTION(image.Sample(def_sampler, input.tex_coord, 0).rgb); + uint y_or_u = dot(input.color_vec.xyz, rgb) + input.color_vec.w; +#ifdef P010 + return y_or_u << 6; +#else + return y_or_u; +#endif +#endif +} diff --git a/third-party/moonlight-common-c b/third-party/moonlight-common-c index 8599b6042a4..ed07ba2cf55 160000 --- a/third-party/moonlight-common-c +++ b/third-party/moonlight-common-c @@ -1 +1 @@ -Subproject commit 8599b6042a4ba27749b0f94134dd614b4328a9bc +Subproject commit ed07ba2cf55132c0db7c0faae1202dd9b4b93a5d