Skip to content

Commit

Permalink
fixup! Initial implementation of yuv444in420 encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
ns6089 committed Jun 26, 2024
1 parent b8bfacf commit acbc5d4
Showing 1 changed file with 99 additions and 42 deletions.
141 changes: 99 additions & 42 deletions src/platform/windows/display_vram.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,34 @@ namespace platf::dxgi {
device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res);
device_ctx->Draw(9, 0); // vertex shader spreads 9 vertices across 3 viewports
}
else if (recombine_yuv444_into_yuv420) {
device_ctx->OMSetRenderTargets(1, &nv12_Y_rt, nullptr);
device_ctx->VSSetShader(scene_vs.get(), nullptr, 0);
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0);

if (!rt_cleared) {
// TODO: clear
rt_cleared = true;
}

assert(outY_index_buffer);
assert(outYUVinY_views.size() == 3);
device_ctx->RSSetViewports(outYUVinY_views.size(), outYUVinY_views.data());
device_ctx->IASetInputLayout(outY_input_layout.get());
device_ctx->IASetIndexBuffer(outY_index_buffer.get(), DXGI_FORMAT_R32_UINT, 0);
auto y_buffer = outY_vertex_buffer.get();
UINT y_offset = 0;
device_ctx->IASetVertexBuffers(0, 1, &y_buffer, &outY_vertex_buffer_stride, &y_offset);
device_ctx->DrawIndexed(outY_index_buffer_size, 0, 0);

assert(outUV_index_buffer);
device_ctx->IASetInputLayout(outUV_input_layout.get());
device_ctx->IASetIndexBuffer(outUV_index_buffer.get(), DXGI_FORMAT_R32_UINT, 0);
auto uv_buffer = outUV_vertex_buffer.get();
UINT uv_offset = 0;
device_ctx->IASetVertexBuffers(0, 1, &uv_buffer, &outUV_vertex_buffer_stride, &uv_offset);
device_ctx->DrawIndexed(outUV_index_buffer_size, 0, 0);
}
else {
assert(rt_cleared);

Expand All @@ -469,35 +497,13 @@ namespace platf::dxgi {
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_Y_fp16_ps.get() : convert_Y_ps.get(), nullptr, 0);
device_ctx->RSSetViewports(1, &outY_view);
device_ctx->PSSetShaderResources(0, 1, &img_ctx.encoder_input_res);
if (outY_index_buffer) {
assert(outYUVinY_views.size() == 3);
device_ctx->RSSetViewports(outYUVinY_views.size(), outYUVinY_views.data());
device_ctx->IASetInputLayout(outY_input_layout.get());
device_ctx->IASetIndexBuffer(outY_index_buffer.get(), DXGI_FORMAT_R32_UINT, 0);
auto buffer = outY_vertex_buffer.get();
UINT offset = 0;
device_ctx->IASetVertexBuffers(0, 1, &buffer, &outY_vertex_buffer_stride, &offset);
device_ctx->DrawIndexed(outY_index_buffer_size, 0, 0);
}
else {
device_ctx->Draw(3, 0);
}
device_ctx->Draw(3, 0);

device_ctx->OMSetRenderTargets(1, &nv12_UV_rt, nullptr);
device_ctx->VSSetShader(convert_UV_vs.get(), nullptr, 0);
device_ctx->PSSetShader(img.format == DXGI_FORMAT_R16G16B16A16_FLOAT ? convert_UV_fp16_ps.get() : convert_UV_ps.get(), nullptr, 0);
device_ctx->RSSetViewports(1, &outUV_view);
if (outUV_index_buffer) {
device_ctx->IASetInputLayout(outUV_input_layout.get());
device_ctx->IASetIndexBuffer(outUV_index_buffer.get(), DXGI_FORMAT_R32_UINT, 0);
auto buffer = outUV_vertex_buffer.get();
UINT offset = 0;
device_ctx->IASetVertexBuffers(0, 1, &buffer, &outUV_vertex_buffer_stride, &offset);
device_ctx->DrawIndexed(outUV_index_buffer_size, 0, 0);
}
else {
device_ctx->Draw(3, 0);
}
device_ctx->Draw(3, 0);
}

// Release encoder mutex to allow capture code to reuse this image
Expand All @@ -516,7 +522,8 @@ namespace platf::dxgi {

if (format == DXGI_FORMAT_AYUV ||
format == DXGI_FORMAT_R16_UINT ||
format == DXGI_FORMAT_Y410) {
format == DXGI_FORMAT_Y410 ||
recombine_yuv444_into_yuv420) {
color_vectors = ::video::new_color_vectors_from_colorspace(colorspace);
}

Expand Down Expand Up @@ -640,6 +647,28 @@ namespace platf::dxgi {
}
rt_cleared = false; // can't use ClearRenderTargetView(), will clear on first convert()
}
else if (recombine_yuv444_into_yuv420) {
D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc {
format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8_UINT,
D3D11_RTV_DIMENSION_TEXTURE2D
};

auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}

nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UINT : DXGI_FORMAT_R8G8_UINT;

status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt);
if (FAILED(status)) {
BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']';
return -1;
}

rt_cleared = false; // can't use ClearRenderTargetView() on Y plane, will clear on first convert()
}
else {
D3D11_RENDER_TARGET_VIEW_DESC nv12_rt_desc {
format == DXGI_FORMAT_P010 ? DXGI_FORMAT_R16_UNORM : DXGI_FORMAT_R8_UNORM,
Expand Down Expand Up @@ -672,7 +701,14 @@ namespace platf::dxgi {
}

int
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) {
if (yuv444in420 && format != DXGI_FORMAT_NV12 && format != DXGI_FORMAT_P010) {
BOOST_LOG(error) << "Recombined YUV 4:4:4 is not supported on this surface format";
return -1;
}

recombine_yuv444_into_yuv420 = yuv444in420;

D3D_FEATURE_LEVEL featureLevels[] {
D3D_FEATURE_LEVEL_11_1,
D3D_FEATURE_LEVEL_11_0,
Expand Down Expand Up @@ -748,13 +784,20 @@ namespace platf::dxgi {
return -1; \
}

create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, scene_vs);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
if (yuv444in420) {
create_vertex_shader_helper(convert_yuv444in420_planar_y_vs_hlsl, scene_vs);
create_vertex_shader_helper(convert_yuv444in420_packed_uv_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv444in420_planar_y_ps_hlsl, convert_Y_ps);
create_pixel_shader_helper(convert_yuv444in420_packed_uv_ps_hlsl, convert_UV_ps);
}
else {
create_vertex_shader_helper(convert_yuv420_planar_y_vs_hlsl, scene_vs);
create_vertex_shader_helper(convert_yuv420_packed_uv_type0_vs_hlsl, convert_UV_vs);
create_pixel_shader_helper(convert_yuv420_planar_y_ps_hlsl, convert_Y_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_hlsl, convert_UV_ps);
}

bool test_yuv444in420_flag = true;
if (test_yuv444in420_flag) {
if (yuv444in420) {
// Create input layouts
{
D3D11_INPUT_ELEMENT_DESC outY_input_layout_desc[] = {
Expand Down Expand Up @@ -909,14 +952,26 @@ namespace platf::dxgi {

// If the display is in HDR and we're streaming HDR, we'll be converting scRGB to SMPTE 2084 PQ.
if (format == DXGI_FORMAT_P010 && display->is_hdr()) {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
if (yuv444in420) {
create_pixel_shader_helper(convert_yuv444in420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv444in420_packed_uv_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_perceptual_quantizer_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_perceptual_quantizer_hlsl, convert_UV_fp16_ps);
}
}
else {
// If the display is in Advanced Color mode, the desktop format will be scRGB FP16.
// scRGB uses linear gamma, so we must use our linear to sRGB conversion shaders.
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
if (yuv444in420) {
create_pixel_shader_helper(convert_yuv444in420_planar_y_ps_linear_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv444in420_packed_uv_ps_linear_hlsl, convert_UV_fp16_ps);
}
else {
create_pixel_shader_helper(convert_yuv420_planar_y_ps_linear_hlsl, convert_Y_fp16_ps);
create_pixel_shader_helper(convert_yuv420_packed_uv_type0_ps_linear_hlsl, convert_UV_fp16_ps);
}
}

if (format == DXGI_FORMAT_R16_UINT) {
Expand Down Expand Up @@ -1100,6 +1155,8 @@ namespace platf::dxgi {
return resource_view;
}

bool recombine_yuv444_into_yuv420 = false;

::video::color_t *color_p;

buf_t subsample_offset;
Expand Down Expand Up @@ -1158,8 +1215,8 @@ namespace platf::dxgi {
class d3d_avcodec_encode_device_t: public avcodec_encode_device_t {
public:
int
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
int result = base.init(display, adapter_p, pix_fmt);
init(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) {
int result = base.init(display, adapter_p, pix_fmt, yuv444in420);
data = base.device.get();
return result;
}
Expand Down Expand Up @@ -1255,14 +1312,14 @@ namespace platf::dxgi {
class d3d_nvenc_encode_device_t: public nvenc_encode_device_t {
public:
bool
init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) {
init_device(std::shared_ptr<platf::display_t> display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt, bool yuv444in420) {
buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt);
if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) {
BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']';
return false;
}

if (base.init(display, adapter_p, pix_fmt)) return false;
if (base.init(display, adapter_p, pix_fmt, yuv444in420)) return false;

if (pix_fmt == pix_fmt_e::yuv444p16) {
nvenc_d3d = std::make_unique<nvenc::nvenc_d3d11_on_cuda>(base.device.get());
Expand Down Expand Up @@ -2109,7 +2166,7 @@ namespace platf::dxgi {

auto device = std::make_unique<d3d_avcodec_encode_device_t>();

auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt);
auto ret = device->init(shared_from_this(), adapter.get(), pix_fmt, yuv444in420);

if (ret) {
return nullptr;
Expand All @@ -2121,7 +2178,7 @@ namespace platf::dxgi {
std::unique_ptr<nvenc_encode_device_t>
display_vram_t::make_nvenc_encode_device(pix_fmt_e pix_fmt, bool yuv444in420) {
auto device = std::make_unique<d3d_nvenc_encode_device_t>();
if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt)) {
if (!device->init_device(shared_from_this(), adapter.get(), pix_fmt, yuv444in420)) {
return nullptr;
}
return device;
Expand Down

0 comments on commit acbc5d4

Please sign in to comment.