diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index ed88e8d593d..7a46ba4b5b7 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -107,6 +107,10 @@ namespace platf::dxgi { blob_t convert_yuv420_packed_uv_type0_ps_linear_hlsl; blob_t convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl; blob_t convert_yuv420_packed_uv_type0_vs_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_linear_hlsl; + blob_t convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl; + blob_t convert_yuv420_packed_uv_type0s_vs_hlsl; blob_t convert_yuv420_planar_y_ps_hlsl; blob_t convert_yuv420_planar_y_ps_linear_hlsl; blob_t convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl; @@ -488,6 +492,110 @@ namespace platf::dxgi { frame_texture->AddRef(); output_texture.reset(frame_texture); + HRESULT status = S_OK; + +#define create_vertex_shader_helper(x, y) \ + if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } +#define create_pixel_shader_helper(x, y) \ + if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ + BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \ + return -1; \ + } + + const bool downscaling = display->width != width || display->height != height; + + switch (format) { + case DXGI_FORMAT_NV12: + // Semi-planar 8-bit YUV 4:2:0 + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + if (downscaling) { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps); + } + else { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } + break; + + case DXGI_FORMAT_P010: + // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + if (downscaling) { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear_hlsl, convert_UV_fp16_ps); + } + } + else { + create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); + } + } + break; + + case DXGI_FORMAT_R16_UINT: + // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value + create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + case DXGI_FORMAT_AYUV: + // Packed 8-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps); + create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + break; + + case DXGI_FORMAT_Y410: + // Packed 10-bit YUV 4:4:4 + create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps); + if (display->is_hdr()) { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); + } + else { + create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); + } + break; + + default: + BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format"; + return -1; + } + +#undef create_vertex_shader_helper +#undef create_pixel_shader_helper + auto out_width = width; auto out_height = height; @@ -676,83 +784,6 @@ namespace platf::dxgi { BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance."; } -#define create_vertex_shader_helper(x, y) \ - if (FAILED(status = device->CreateVertexShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ - BOOST_LOG(error) << "Failed to create vertex shader " << #x << ": " << util::log_hex(status); \ - return -1; \ - } -#define create_pixel_shader_helper(x, y) \ - if (FAILED(status = device->CreatePixelShader(x->GetBufferPointer(), x->GetBufferSize(), nullptr, &y))) { \ - BOOST_LOG(error) << "Failed to create pixel shader " << #x << ": " << util::log_hex(status); \ - return -1; \ - } - - switch (format) { - case DXGI_FORMAT_NV12: - // Semi-planar 8-bit YUV 4:2:0 - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); - break; - - case DXGI_FORMAT_P010: - // Semi-planar 16-bit YUV 4:2:0, 10 most significant bits store the value - create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_or_YUV_ps); - create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps); - } - break; - - case DXGI_FORMAT_R16_UINT: - // Planar 16-bit YUV 4:4:4, 10 most significant bits store the value - create_vertex_shader_helper(convert_yuv444_planar_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_planar_ps_hlsl, convert_Y_or_YUV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv444_planar_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv444_planar_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - } - break; - - case DXGI_FORMAT_AYUV: - // Packed 8-bit YUV 4:4:4 - create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_hlsl, convert_Y_or_YUV_ps); - create_pixel_shader_helper(convert_yuv444_packed_ayuv_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - break; - - case DXGI_FORMAT_Y410: - // Packed 10-bit YUV 4:4:4 - create_vertex_shader_helper(convert_yuv444_packed_vs_hlsl, convert_Y_or_YUV_vs); - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_hlsl, convert_Y_or_YUV_ps); - if (display->is_hdr()) { - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_perceptual_quantizer_hlsl, convert_Y_or_YUV_fp16_ps); - } - else { - create_pixel_shader_helper(convert_yuv444_packed_y410_ps_linear_hlsl, convert_Y_or_YUV_fp16_ps); - } - break; - - default: - BOOST_LOG(error) << "Unable to create shaders because of the unrecognized surface format"; - return -1; - } - -#undef create_vertex_shader_helper -#undef create_pixel_shader_helper - auto default_color_vectors = ::video::color_vectors_from_colorspace(::video::colorspace_e::rec601, false); if (!default_color_vectors) { BOOST_LOG(error) << "Missing color vectors for Rec. 601"sv; @@ -1916,6 +1947,10 @@ namespace platf::dxgi { compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear); compile_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer); compile_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_linear); + compile_pixel_shader_helper(convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer); + compile_vertex_shader_helper(convert_yuv420_packed_uv_type0s_vs); compile_pixel_shader_helper(convert_yuv420_planar_y_ps); compile_pixel_shader_helper(convert_yuv420_planar_y_ps_linear); compile_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer); diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl new file mode 100644 index 00000000000..73fd423c415 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl new file mode 100644 index 00000000000..c451dc19d4a --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_linear.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_linear_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl new file mode 100644 index 00000000000..9156257f1f1 --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_ps_perceptual_quantizer.hlsl @@ -0,0 +1,5 @@ +#include "include/convert_perceptual_quantizer_base.hlsl" + +#define LEFT_SUBSAMPLING_SCALE + +#include "include/convert_yuv420_packed_uv_ps_base.hlsl" diff --git a/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl new file mode 100644 index 00000000000..c6df6b49bad --- /dev/null +++ b/src_assets/windows/assets/shaders/directx/convert_yuv420_packed_uv_type0s_vs.hlsl @@ -0,0 +1,15 @@ +cbuffer subsample_offset_cbuffer : register(b0) { + float2 subsample_offset; +}; + +cbuffer rotate_texture_steps_cbuffer : register(b1) { + int rotate_texture_steps; +}; + +#define LEFT_SUBSAMPLING_SCALE +#include "include/base_vs.hlsl" + +vertex_t main_vs(uint vertex_id : SV_VertexID) +{ + return generate_fullscreen_triangle_vertex(vertex_id, subsample_offset / 2, rotate_texture_steps); +} diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl index c39e7c6f80b..287d252be83 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs.hlsl @@ -2,6 +2,8 @@ #if defined(LEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float subsample_offset, int rotate_texture_steps) +#elif defined(LEFT_SUBSAMPLING_SCALE) +vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 halfsample_offset, int rotate_texture_steps) #elif defined(TOPLEFT_SUBSAMPLING) vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, float2 subsample_offset, int rotate_texture_steps) #else @@ -34,7 +36,15 @@ vertex_t generate_fullscreen_triangle_vertex(uint vertex_id, int rotate_texture_ #if defined(LEFT_SUBSAMPLING) output.tex_right_left_center = float3(tex_coord.x, tex_coord.x - subsample_offset, tex_coord.y); -#elif defined (TOPLEFT_SUBSAMPLING) +#elif defined(LEFT_SUBSAMPLING_SCALE) + float3 right_center_left = float3(tex_coord.x + halfsample_offset.x, + tex_coord.x - halfsample_offset.x, + tex_coord.x - 3 * halfsample_offset.x); + float2 top_bottom = float2(tex_coord.y - halfsample_offset.y, + tex_coord.y + halfsample_offset.y); + output.tex_right_center_left_top = float4(right_center_left, top_bottom.x); + output.tex_right_center_left_bottom = float4(right_center_left, top_bottom.y); +#elif defined(TOPLEFT_SUBSAMPLING) output.tex_right_left_top = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y - subsample_offset.y); output.tex_right_left_bottom = float3(tex_coord.x, tex_coord.x - subsample_offset.x, tex_coord.y); #else diff --git a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl index cf755c5a0ff..fabc52bb777 100644 --- a/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/base_vs_types.hlsl @@ -3,9 +3,12 @@ struct vertex_t float4 viewpoint_pos : SV_Position; #if defined(LEFT_SUBSAMPLING) float3 tex_right_left_center : TEXCOORD; -#elif defined (TOPLEFT_SUBSAMPLING) - float3 tex_right_left_top : TEXCOORD; - float3 tex_right_left_bottom : TEXCOORD; +#elif defined(LEFT_SUBSAMPLING_SCALE) + float4 tex_right_center_left_top : TEXCOORD0; + float4 tex_right_center_left_bottom : TEXCOORD1; +#elif defined(TOPLEFT_SUBSAMPLING) + float3 tex_right_left_top : TEXCOORD0; + float3 tex_right_left_bottom : TEXCOORD1; #else float2 tex_coord : TEXCOORD; #endif diff --git a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl index c21dccd7ed2..ad69c2ac0ef 100644 --- a/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl +++ b/src_assets/windows/assets/shaders/directx/include/convert_yuv420_packed_uv_ps_base.hlsl @@ -17,6 +17,14 @@ float2 main_ps(vertex_t input) : SV_Target float3 rgb_left = image.Sample(def_sampler, input.tex_right_left_center.xz).rgb; float3 rgb_right = image.Sample(def_sampler, input.tex_right_left_center.yz).rgb; float3 rgb = CONVERT_FUNCTION((rgb_left + rgb_right) * 0.5); +#elif defined(LEFT_SUBSAMPLING_SCALE) + float3 rgb = image.Sample(def_sampler, input.tex_right_center_left_top.xw).rgb; // top-right + rgb += image.Sample(def_sampler, input.tex_right_center_left_top.yw).rgb; // top-center + rgb += image.Sample(def_sampler, input.tex_right_center_left_top.zw).rgb; // top-left + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.xw).rgb; // bottom-right + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.yw).rgb; // bottom-center + rgb += image.Sample(def_sampler, input.tex_right_center_left_bottom.zw).rgb; // bottom-left + rgb = CONVERT_FUNCTION(rgb * (1./6)); #elif defined(TOPLEFT_SUBSAMPLING) float3 rgb_top_left = image.Sample(def_sampler, input.tex_right_left_top.xz).rgb; float3 rgb_top_right = image.Sample(def_sampler, input.tex_right_left_top.yz).rgb;