diff --git a/CMakeLists.txt b/CMakeLists.txt index d9574c8ef8d..17659f63c66 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -179,6 +179,12 @@ if(WIN32) third-party/ViGEmClient/include/ViGEm/Util.h third-party/ViGEmClient/include/ViGEm/km/BusShared.h) + file(GLOB NVENC_SOURCES CONFIGURE_DEPENDS + "third-party/nvenc/*.h" + "src/nvenc/*.cpp" + "src/nvenc/*.h") + list(APPEND PLATFORM_TARGET_FILES ${NVENC_SOURCES}) + set(OPENSSL_LIBRARIES libssl.a libcrypto.a) diff --git a/src/main.h b/src/main.h index d5a6eeb5a07..65ed1db6e7f 100644 --- a/src/main.h +++ b/src/main.h @@ -62,6 +62,7 @@ namespace mail { // Local mail MAIL(touch_port); MAIL(idr); + MAIL(invalidate_ref_frames); MAIL(gamepad_feedback); MAIL(hdr); #undef MAIL diff --git a/src/nvenc/nvenc_base.cpp b/src/nvenc/nvenc_base.cpp new file mode 100644 index 00000000000..869d54fa770 --- /dev/null +++ b/src/nvenc/nvenc_base.cpp @@ -0,0 +1,322 @@ +#include "nvenc_base.h" + +namespace { + GUID + quality_preset_guid_from_number(unsigned number) { + if (number > 7) number = 7; + + switch (number) { + case 1: + default: + return NV_ENC_PRESET_P1_GUID; + + case 2: + return NV_ENC_PRESET_P2_GUID; + + case 3: + return NV_ENC_PRESET_P3_GUID; + + case 4: + return NV_ENC_PRESET_P4_GUID; + + case 5: + return NV_ENC_PRESET_P5_GUID; + + case 6: + return NV_ENC_PRESET_P6_GUID; + + case 7: + return NV_ENC_PRESET_P7_GUID; + } + }; + + auto + quality_preset_string_from_guid(const GUID &guid) { + if (guid == NV_ENC_PRESET_P1_GUID) { + return "P1"; + } + if (guid == NV_ENC_PRESET_P2_GUID) { + return "P2"; + } + if (guid == NV_ENC_PRESET_P3_GUID) { + return "P3"; + } + if (guid == NV_ENC_PRESET_P4_GUID) { + return "P4"; + } + if (guid == NV_ENC_PRESET_P5_GUID) { + return "P5"; + } + if (guid == NV_ENC_PRESET_P6_GUID) { + return "P6"; + } + if (guid == NV_ENC_PRESET_P7_GUID) { + return "P7"; + } + return "Unknown"; + } + +} // namespace + +namespace nvenc { + + nvenc_base::nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device, uint32_t max_width, uint32_t max_height, NV_ENC_BUFFER_FORMAT buffer_format): + device_type(device_type), + device(device), + max_width(max_width), + max_height(max_height), + buffer_format(buffer_format) { + } + + nvenc_base::~nvenc_base() { + // Use destroy_base_resources() instead + } + + bool + nvenc_base::create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace) { + if (encoder) return false; + if (!nvenc && !init_library()) return false; + + NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = { NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER }; + session_params.device = device; + session_params.deviceType = device_type; + session_params.apiVersion = NVENCAPI_VERSION; + if (nvenc->nvEncOpenEncodeSessionEx(&session_params, &encoder) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncOpenEncodeSessionEx failed"; + return false; + } + + width = client_config.width; + height = client_config.height; + + NV_ENC_INITIALIZE_PARAMS init_params = { NV_ENC_INITIALIZE_PARAMS_VER }; + init_params.maxEncodeWidth = max_width; + init_params.maxEncodeHeight = max_height; + init_params.presetGUID = quality_preset_guid_from_number(config.quality_preset); + init_params.tuningInfo = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY; + init_params.enablePTD = 1; + + init_params.encodeWidth = width; + init_params.darWidth = width; + init_params.encodeHeight = height; + init_params.darHeight = height; + init_params.frameRateNum = client_config.framerate; + init_params.frameRateDen = 1; + + switch (client_config.videoFormat) { + case 0: + // H.264 + init_params.encodeGUID = NV_ENC_CODEC_H264_GUID; + break; + + case 1: + // HEVC + init_params.encodeGUID = NV_ENC_CODEC_HEVC_GUID; + break; + } + + NV_ENC_PRESET_CONFIG preset_config = { NV_ENC_PRESET_CONFIG_VER, { NV_ENC_CONFIG_VER } }; + if (nvenc->nvEncGetEncodePresetConfigEx(encoder, init_params.encodeGUID, init_params.presetGUID, init_params.tuningInfo, &preset_config) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncGetEncodePresetConfigEx failed: " << nvenc->nvEncGetLastErrorString(encoder); + return false; + } + + NV_ENC_CONFIG enc_config = preset_config.presetCfg; + enc_config.profileGUID = NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID; // TODO: select specific profile? + enc_config.gopLength = NVENC_INFINITE_GOPLENGTH; + enc_config.frameIntervalP = 1; + enc_config.rcParams.enableAQ = 0; + enc_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR; + enc_config.rcParams.zeroReorderDelay = 1; + enc_config.rcParams.enableLookahead = 0; + enc_config.rcParams.enableNonRefP = 1; + enc_config.rcParams.lowDelayKeyFrameScale = config.keyframe_vbv_multiplier > 1 ? config.keyframe_vbv_multiplier : 1; + enc_config.rcParams.multiPass = config.multipass == multipass_e::two_pass_full_res ? NV_ENC_TWO_PASS_FULL_RESOLUTION : + config.multipass == multipass_e::two_pass_quarter_res ? NV_ENC_TWO_PASS_QUARTER_RESOLUTION : + NV_ENC_MULTI_PASS_DISABLED; + enc_config.rcParams.averageBitRate = client_config.bitrate * 1000; + enc_config.rcParams.vbvBufferSize = client_config.bitrate * 1000 / client_config.framerate; + + auto set_common_format_config = [&config, &client_config, this](auto &format_config) { + format_config.repeatSPSPPS = 1; + format_config.idrPeriod = NVENC_INFINITE_GOPLENGTH; + format_config.sliceMode = 3; + format_config.sliceModeData = client_config.slicesPerFrame; + if (buffer_format == NV_ENC_BUFFER_FORMAT_YUV444 || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + format_config.chromaFormatIDC = 3; + } + format_config.enableFillerDataInsertion = config.filler_data_insertion; + }; + + auto fill_vui = [&colorspace](auto &vui_config) { + vui_config.videoSignalTypePresentFlag = 1; + vui_config.videoFormat = NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED; + vui_config.videoFullRangeFlag = colorspace.full_range; + vui_config.colourDescriptionPresentFlag = 1; + vui_config.colourPrimaries = colorspace.primaries; + vui_config.transferCharacteristics = colorspace.tranfer_function; + vui_config.colourMatrix = colorspace.matrix; + }; + + switch (client_config.videoFormat) { + case 0: { + // H.264 + auto &format_config = enc_config.encodeCodecConfig.h264Config; + set_common_format_config(format_config); + if (client_config.numRefFrames > 0) { + format_config.maxNumRefFrames = client_config.numRefFrames; + } + else { + format_config.maxNumRefFrames = 5; + } + format_config.numRefL0 = NV_ENC_NUM_REF_FRAMES_1; + fill_vui(format_config.h264VUIParameters); + break; + } + + case 1: { + // HEVC + auto &format_config = enc_config.encodeCodecConfig.hevcConfig; + set_common_format_config(format_config); + if (buffer_format == NV_ENC_BUFFER_FORMAT_YUV420_10BIT || buffer_format == NV_ENC_BUFFER_FORMAT_YUV444_10BIT) { + format_config.pixelBitDepthMinus8 = 2; + } + if (client_config.numRefFrames > 0) { + format_config.maxNumRefFramesInDPB = client_config.numRefFrames; + } + else { + format_config.maxNumRefFramesInDPB = 5; + } + format_config.numRefL0 = NV_ENC_NUM_REF_FRAMES_1; + fill_vui(format_config.hevcVUIParameters); + break; + } + } + + init_params.encodeConfig = &enc_config; + + if (nvenc->nvEncInitializeEncoder(encoder, &init_params) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncInitializeEncoder failed: " << nvenc->nvEncGetLastErrorString(encoder); + return false; + } + + NV_ENC_CREATE_BITSTREAM_BUFFER create_bitstream_buffer = { NV_ENC_CREATE_BITSTREAM_BUFFER_VER }; + if (nvenc->nvEncCreateBitstreamBuffer(encoder, &create_bitstream_buffer) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncCreateBitstreamBuffer failed: " << nvenc->nvEncGetLastErrorString(encoder); + return false; + } + output_bitstream = create_bitstream_buffer.bitstreamBuffer; + + if (!create_input_buffer()) { + return false; + } + + BOOST_LOG(info) << "Created NvENC encoder at " << quality_preset_string_from_guid(init_params.presetGUID); + + return true; + } + + nvenc_encoded_frame + nvenc_base::encode_frame(uint64_t frame_index, bool force_idr) { + if (!encoder || !output_bitstream) { + return {}; + } + + auto input_buffer = get_input_buffer(); + + if (!input_buffer) { + return {}; + } + + NV_ENC_PIC_PARAMS pic_params = { NV_ENC_PIC_PARAMS_VER }; + pic_params.inputWidth = width; + pic_params.inputHeight = height; + pic_params.encodePicFlags = force_idr ? NV_ENC_PIC_FLAG_FORCEIDR : 0; + pic_params.inputTimeStamp = frame_index; + pic_params.inputBuffer = input_buffer; + pic_params.outputBitstream = output_bitstream; + pic_params.bufferFmt = buffer_format; + pic_params.pictureStruct = NV_ENC_PIC_STRUCT_FRAME; + + if (nvenc->nvEncEncodePicture(encoder, &pic_params) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncEncodePicture failed: " << nvenc->nvEncGetLastErrorString(encoder); + return {}; + } + + NV_ENC_LOCK_BITSTREAM lock_bitstream = { NV_ENC_LOCK_BITSTREAM_VER }; + lock_bitstream.outputBitstream = output_bitstream; + lock_bitstream.doNotWait = false; + + if (nvenc->nvEncLockBitstream(encoder, &lock_bitstream) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncLockBitstream failed: " << nvenc->nvEncGetLastErrorString(encoder); + return {}; + } + + auto data_pointer = (uint8_t *) lock_bitstream.bitstreamBufferPtr; + nvenc_encoded_frame result { + { data_pointer, data_pointer + lock_bitstream.bitstreamSizeInBytes }, + lock_bitstream.outputTimeStamp, + lock_bitstream.pictureType == NV_ENC_PIC_TYPE_IDR, + ref_frame_invalidation_requested, + }; + + if (ref_frame_invalidation_requested) { + // Invalidation request has been fullfilled, and video network packet will be marked as such + ref_frame_invalidation_requested = false; + } + + last_encoded_frame_index = frame_index; + + if (result.idr) { + BOOST_LOG(info) << "idr " << result.frame_index; + } + + if (nvenc->nvEncUnlockBitstream(encoder, lock_bitstream.outputBitstream) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncUnlockBitstream failed: " << nvenc->nvEncGetLastErrorString(encoder); + return {}; + } + + return result; + } + + bool + nvenc_base::invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame) { + if (!encoder) return false; + + if (last_frame < first_frame || last_encoded_frame_index < first_frame || last_encoded_frame_index > first_frame + 100) { + BOOST_LOG(error) << "invalidate_ref_frames " << first_frame << "-" << last_frame << " invalid range (current frame " << last_encoded_frame_index << ")"; + return false; + } + + if (first_frame >= last_ref_frame_invalidation_range.first && last_frame <= last_ref_frame_invalidation_range.second) { + BOOST_LOG(info) << "invalidate_ref_frames " << first_frame << "-" << last_frame << " predicted"; + return true; + } + + BOOST_LOG(info) << "invalidate_ref_frames " << first_frame << "-" << last_frame << " predicting " << first_frame << "-" << last_encoded_frame_index; + + ref_frame_invalidation_requested = true; + last_ref_frame_invalidation_range = { first_frame, last_encoded_frame_index }; + + bool result = true; + for (auto i = first_frame; i <= last_encoded_frame_index; i++) { + if (nvenc->nvEncInvalidateRefFrames(encoder, i) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncInvalidateRefFrames " << i << " failed: " << nvenc->nvEncGetLastErrorString(encoder); + result = false; + } + } + + return result; + } + + void + nvenc_base::destroy_base_resources() { + if (output_bitstream) { + nvenc->nvEncDestroyBitstreamBuffer(encoder, output_bitstream); + } + if (encoder) { + nvenc->nvEncDestroyEncoder(encoder); + } + } + +} // namespace nvenc diff --git a/src/nvenc/nvenc_base.h b/src/nvenc/nvenc_base.h new file mode 100644 index 00000000000..4e140f327d9 --- /dev/null +++ b/src/nvenc/nvenc_base.h @@ -0,0 +1,63 @@ +#pragma once + +#include "nvenc_colorspace.h" +#include "nvenc_config.h" +#include "nvenc_encoded_frame.h" + +#include "src/video.h" +#include "third-party/nvenc/nvEncodeAPI.h" + +namespace nvenc { + + class nvenc_base { + public: + nvenc_base(NV_ENC_DEVICE_TYPE device_type, void *device, uint32_t max_width, uint32_t max_height, NV_ENC_BUFFER_FORMAT buffer_format); + virtual ~nvenc_base(); + + nvenc_base(const nvenc_base &) = delete; + nvenc_base & + operator=(const nvenc_base &) = delete; + + bool + create_encoder(const nvenc_config &config, const video::config_t &client_config, const nvenc_colorspace_t &colorspace); + + nvenc_encoded_frame + encode_frame(uint64_t frame_index, bool force_idr); + + bool + invalidate_ref_frames(uint64_t first_frame, uint64_t last_frame); + + protected: + void + destroy_base_resources(); + + virtual bool + init_library() = 0; + + virtual bool + create_input_buffer() = 0; + + virtual NV_ENC_REGISTERED_PTR + get_input_buffer() = 0; + + const NV_ENC_DEVICE_TYPE device_type; + void *const device; + const uint32_t max_width; + const uint32_t max_height; + const NV_ENC_BUFFER_FORMAT buffer_format; + + std::unique_ptr nvenc; + void *encoder = nullptr; + uint32_t width = 0; + uint32_t height = 0; + + private: + NV_ENC_OUTPUT_PTR output_bitstream = nullptr; + + uint64_t last_encoded_frame_index = 0; + + bool ref_frame_invalidation_requested = false; + std::pair last_ref_frame_invalidation_range; + }; + +} // namespace nvenc diff --git a/src/nvenc/nvenc_colorspace.h b/src/nvenc/nvenc_colorspace.h new file mode 100644 index 00000000000..cb632f129c6 --- /dev/null +++ b/src/nvenc/nvenc_colorspace.h @@ -0,0 +1,12 @@ +#pragma once + +#include "third-party/nvenc/nvEncodeAPI.h" + +namespace nvenc { + struct nvenc_colorspace_t { + NV_ENC_VUI_COLOR_PRIMARIES primaries; + NV_ENC_VUI_TRANSFER_CHARACTERISTIC tranfer_function; + NV_ENC_VUI_MATRIX_COEFFS matrix; + bool full_range; + }; +} // namespace nvenc diff --git a/src/nvenc/nvenc_config.h b/src/nvenc/nvenc_config.h new file mode 100644 index 00000000000..b55cb5d7df0 --- /dev/null +++ b/src/nvenc/nvenc_config.h @@ -0,0 +1,18 @@ +#pragma once + +namespace nvenc { + + enum class multipass_e { + one_pass, + two_pass_quarter_res, + two_pass_full_res, + }; + + struct nvenc_config { + unsigned quality_preset = 1; // Quality preset from 1 to 7 + unsigned keyframe_vbv_multiplier = 2; // Allows I-frames to break normal VBV constraints + multipass_e multipass = multipass_e::one_pass; + bool filler_data_insertion = false; + }; + +} // namespace nvenc diff --git a/src/nvenc/nvenc_d3d11.cpp b/src/nvenc/nvenc_d3d11.cpp new file mode 100644 index 00000000000..c1d6b4a10ca --- /dev/null +++ b/src/nvenc/nvenc_d3d11.cpp @@ -0,0 +1,105 @@ +#ifdef _WIN32 + #include "nvenc_d3d11.h" + + #include "nvenc_utils.h" + +namespace nvenc { + + nvenc_d3d11::nvenc_d3d11(ID3D11Device *d3d_device, uint32_t max_width, uint32_t max_height, NV_ENC_BUFFER_FORMAT buffer_format): + nvenc_base(NV_ENC_DEVICE_TYPE_DIRECTX, d3d_device, max_width, max_height, buffer_format), + d3d_device(d3d_device) { + } + + nvenc_d3d11::~nvenc_d3d11() { + destroy_base_resources(); + + if (d3d_input_texture) { + nvenc->nvEncUnregisterResource(encoder, d3d_input_texture_reg); + } + + if (dll) { + FreeLibrary(dll); + dll = NULL; + } + } + + ID3D11Texture2D * + nvenc_d3d11::get_input_texture() { + return d3d_input_texture.GetInterfacePtr(); + } + + bool + nvenc_d3d11::init_library() { + if (dll) return true; + + #ifdef _WIN64 + auto dll_name = "nvEncodeAPI64.dll"; + #else + auto dll_name = "nvEncodeAPI.dll"; + #endif + + if ((dll = LoadLibraryEx(dll_name, NULL, LOAD_LIBRARY_SEARCH_SYSTEM32))) { + if (auto create_instance = (decltype(NvEncodeAPICreateInstance) *) GetProcAddress(dll, "NvEncodeAPICreateInstance")) { + auto new_nvenc = std::make_unique(); + new_nvenc->version = NV_ENCODE_API_FUNCTION_LIST_VER; + if (create_instance(new_nvenc.get()) == NV_ENC_SUCCESS) { + nvenc = std::move(new_nvenc); + return true; + } + else { + BOOST_LOG(error) << "NvEncodeAPICreateInstance failed"; + } + } + } + + if (dll) { + FreeLibrary(dll); + dll = NULL; + } + + return false; + } + + bool + nvenc_d3d11::create_input_buffer() { + if (d3d_input_texture) return false; + + D3D11_TEXTURE2D_DESC desc = {}; + desc.Width = max_width; + desc.Height = max_height; + desc.MipLevels = 1; + desc.ArraySize = 1; + desc.Format = dxgi_format_from_nvenc_format(buffer_format); + desc.SampleDesc.Count = 1; + desc.Usage = D3D11_USAGE_DEFAULT; + desc.BindFlags = D3D11_BIND_RENDER_TARGET; + desc.CPUAccessFlags = 0; + if (d3d_device->CreateTexture2D(&desc, nullptr, &d3d_input_texture) != S_OK) { + BOOST_LOG(error) << "Couldn't create input texture for NvENC"; + return false; + } + + NV_ENC_REGISTER_RESOURCE register_resource = { NV_ENC_REGISTER_RESOURCE_VER }; + register_resource.resourceType = NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX; + register_resource.width = max_width; + register_resource.height = max_height; + register_resource.resourceToRegister = d3d_input_texture.GetInterfacePtr(); + register_resource.bufferFormat = buffer_format; + register_resource.bufferUsage = NV_ENC_INPUT_IMAGE; + + if (nvenc->nvEncRegisterResource(encoder, ®ister_resource) != NV_ENC_SUCCESS) { + BOOST_LOG(error) << "NvEncRegisterResource failed: " << nvenc->nvEncGetLastErrorString(encoder); + return false; + } + d3d_input_texture_reg = register_resource.registeredResource; + + return true; + } + + NV_ENC_REGISTERED_PTR + nvenc_d3d11::get_input_buffer() { + return d3d_input_texture_reg; + } + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_d3d11.h b/src/nvenc/nvenc_d3d11.h new file mode 100644 index 00000000000..e0e022f7f20 --- /dev/null +++ b/src/nvenc/nvenc_d3d11.h @@ -0,0 +1,39 @@ +#pragma once +#ifdef _WIN32 + + #include + #include + + #include "nvenc_base.h" + +namespace nvenc { + + _COM_SMARTPTR_TYPEDEF(ID3D11Device, IID_ID3D11Device); + _COM_SMARTPTR_TYPEDEF(ID3D11Texture2D, IID_ID3D11Texture2D); + + class nvenc_d3d11 final: public nvenc_base { + public: + nvenc_d3d11(ID3D11Device *d3d_device, uint32_t max_width, uint32_t max_height, NV_ENC_BUFFER_FORMAT buffer_format); + ~nvenc_d3d11(); + + ID3D11Texture2D * + get_input_texture(); + + private: + bool + init_library() override; + + bool + create_input_buffer() override; + + NV_ENC_REGISTERED_PTR + get_input_buffer() override; + + HMODULE dll = NULL; + const ID3D11DevicePtr d3d_device; + ID3D11Texture2DPtr d3d_input_texture; + NV_ENC_REGISTERED_PTR d3d_input_texture_reg = nullptr; + }; + +} // namespace nvenc +#endif diff --git a/src/nvenc/nvenc_encoded_frame.h b/src/nvenc/nvenc_encoded_frame.h new file mode 100644 index 00000000000..f60ba3023e7 --- /dev/null +++ b/src/nvenc/nvenc_encoded_frame.h @@ -0,0 +1,13 @@ +#pragma once + +#include +#include + +namespace nvenc { + struct nvenc_encoded_frame { + std::vector data; + uint64_t frame_index = 0; + bool idr = false; + bool after_ref_frame_invalidation = false; + }; +} // namespace nvenc diff --git a/src/nvenc/nvenc_utils.cpp b/src/nvenc/nvenc_utils.cpp new file mode 100644 index 00000000000..7308f329ebf --- /dev/null +++ b/src/nvenc/nvenc_utils.cpp @@ -0,0 +1,78 @@ +#include "nvenc_utils.h" + +namespace nvenc { + +#ifdef _WIN32 + DXGI_FORMAT + dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format) { + switch (format) { + case NV_ENC_BUFFER_FORMAT_YUV420_10BIT: + return DXGI_FORMAT_P010; + + case NV_ENC_BUFFER_FORMAT_NV12: + return DXGI_FORMAT_NV12; + + default: + return DXGI_FORMAT_UNKNOWN; + } + } +#endif + + NV_ENC_BUFFER_FORMAT + nvenc_format_from_sunshine_format(platf::pix_fmt_e format) { + switch (format) { + case platf::pix_fmt_e::nv12: + return NV_ENC_BUFFER_FORMAT_NV12; + + case platf::pix_fmt_e::p010: + return NV_ENC_BUFFER_FORMAT_YUV420_10BIT; + + default: + return NV_ENC_BUFFER_FORMAT_UNDEFINED; + } + } + + nvenc_colorspace_t + nvenc_colorspace_from_rtsp_csc(int csc, bool hdr) { + /* See video::config_t::encoderCscMode for details */ + + nvenc_colorspace_t colorspace = {}; + + colorspace.full_range = (csc & 0x1); + + if (hdr) { + // Rec. 2020 + SMPTE 2084 PQ + colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020; + colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084; + colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL; + } + else { + switch (csc >> 1) { + case 0: + default: + // Rec. 601 + colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M; + colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M; + colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M; + break; + + case 1: + // Rec. 709 + colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT709; + colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709; + colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT709; + break; + + case 2: + // Rec. 2020 + colorspace.primaries = NV_ENC_VUI_COLOR_PRIMARIES_BT2020; + colorspace.tranfer_function = NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_10; + colorspace.matrix = NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL; + break; + } + } + + return colorspace; + } + +} // namespace nvenc diff --git a/src/nvenc/nvenc_utils.h b/src/nvenc/nvenc_utils.h new file mode 100644 index 00000000000..f3a065e853c --- /dev/null +++ b/src/nvenc/nvenc_utils.h @@ -0,0 +1,26 @@ +#pragma once + +#ifdef _WIN32 + #include +#endif + +#include "nvenc_colorspace.h" + +#include "src/platform/common.h" +#include "third-party/nvenc/nvEncodeAPI.h" + +namespace nvenc { + +#ifdef _WIN32 + DXGI_FORMAT + dxgi_format_from_nvenc_format(NV_ENC_BUFFER_FORMAT format); +#endif + + NV_ENC_BUFFER_FORMAT + nvenc_format_from_sunshine_format(platf::pix_fmt_e format); + + /* See video::config_t::encoderCscMode for details */ + nvenc_colorspace_t + nvenc_colorspace_from_rtsp_csc(int csc, bool hdr); + +} // namespace nvenc diff --git a/src/platform/common.h b/src/platform/common.h index 1173afd5ad5..b7f83c8c7df 100644 --- a/src/platform/common.h +++ b/src/platform/common.h @@ -11,10 +11,12 @@ #include #include "src/main.h" +#include "src/nvenc/nvenc_encoded_frame.h" #include "src/thread_safe.h" #include "src/utility.h" extern "C" { +#include #include } @@ -320,15 +322,57 @@ namespace platf { std::optional null; }; - struct hwdevice_t { + struct encoding_stream_t { + virtual ~encoding_stream_t() = default; + + virtual int + convert(platf::img_t &img) = 0; + + virtual void + set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) = 0; + + virtual void + request_idr_frame() = 0; + + virtual void + request_normal_frame() = 0; + + virtual void + invalidate_ref_frames(int64_t first_frame, int64_t last_frame) { + BOOST_LOG(error) << "Encoding stream doesn't support reference frame invalidation"; + } + }; + + struct avcodec_encoding_stream_t: encoding_stream_t { void *data {}; AVFrame *frame {}; - virtual int - convert(platf::img_t &img) { + int + convert(platf::img_t &img) override { return -1; } + void + set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + // TODO: leave as pure virtual? + } + + void + request_idr_frame() override { + if (frame) { + frame->pict_type = AV_PICTURE_TYPE_I; + frame->key_frame = 1; + } + } + + void + request_normal_frame() override { + if (frame) { + frame->pict_type = AV_PICTURE_TYPE_NONE; + frame->key_frame = 0; + } + } + /** * implementations must take ownership of 'frame' */ @@ -338,9 +382,6 @@ namespace platf { return -1; }; - virtual void - set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) {}; - /** * Implementations may set parameters during initialization of the hwframes context */ @@ -355,7 +396,13 @@ namespace platf { return 0; }; - virtual ~hwdevice_t() = default; + virtual ~avcodec_encoding_stream_t() = default; + }; + + struct nvenc_encoding_stream_t: encoding_stream_t { + using encoded_frame = nvenc::nvenc_encoded_frame; + virtual encoded_frame + encode_frame(uint64_t frame_index) = 0; }; enum class capture_e : int { @@ -416,9 +463,14 @@ namespace platf { virtual int dummy_img(img_t *img) = 0; - virtual std::shared_ptr - make_hwdevice(pix_fmt_e pix_fmt) { - return std::make_shared(); + virtual std::unique_ptr + make_avcodec_encoding_stream(pix_fmt_e pix_fmt) { + return std::make_unique(); + } + + virtual std::unique_ptr + make_nvenc_encoding_stream(const video::config_t &config, pix_fmt_e pix_fmt) { + return nullptr; } virtual bool diff --git a/src/platform/windows/display.h b/src/platform/windows/display.h index 2496cd3f55e..610225eccaf 100644 --- a/src/platform/windows/display.h +++ b/src/platform/windows/display.h @@ -13,6 +13,7 @@ #include "src/platform/common.h" #include "src/utility.h" +#include "src/video.h" namespace platf::dxgi { extern const char *format_str[]; @@ -215,8 +216,11 @@ namespace platf::dxgi { int init(const ::video::config_t &config, const std::string &display_name); - std::shared_ptr - make_hwdevice(pix_fmt_e pix_fmt) override; + std::unique_ptr + make_avcodec_encoding_stream(pix_fmt_e pix_fmt) override; + + std::unique_ptr + make_nvenc_encoding_stream(const ::video::config_t &config, pix_fmt_e pix_fmt) override; sampler_state_t sampler_linear; diff --git a/src/platform/windows/display_vram.cpp b/src/platform/windows/display_vram.cpp index 4519243a1af..2624ce4a67b 100644 --- a/src/platform/windows/display_vram.cpp +++ b/src/platform/windows/display_vram.cpp @@ -16,7 +16,11 @@ extern "C" { #include "display.h" #include "misc.h" +#include "src/config.h" #include "src/main.h" +#include "src/nvenc/nvenc_config.h" +#include "src/nvenc/nvenc_d3d11.h" +#include "src/nvenc/nvenc_utils.h" #include "src/video.h" #define SUNSHINE_SHADERS_DIR SUNSHINE_ASSETS_DIR "/shaders/directx" @@ -361,10 +365,10 @@ namespace platf::dxgi { return compile_shader(file, "main_vs", "vs_5_0"); } - class hwdevice_t: public platf::hwdevice_t { + class d3d_base_encoding_stream_t final { public: int - convert(platf::img_t &img_base) override { + convert(platf::img_t &img_base) { // Garbage collect mapped capture images whose weak references have expired for (auto it = img_ctx_map.begin(); it != img_ctx_map.end();) { if (it->second.img_weak.expired()) { @@ -413,7 +417,7 @@ namespace platf::dxgi { } void - set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) { switch (colorspace) { case 5: // SWS_CS_SMPTE170M color_p = &::video::colors[0]; @@ -434,7 +438,7 @@ namespace platf::dxgi { ++color_p; } - auto color_matrix = make_buffer((device_t::pointer) data, *color_p); + auto color_matrix = make_buffer(device.get(), *color_p); if (!color_matrix) { BOOST_LOG(warning) << "Failed to create color matrix"sv; return; @@ -445,78 +449,14 @@ namespace platf::dxgi { this->color_matrix = std::move(color_matrix); } - void - init_hwframes(AVHWFramesContext *frames) override { - // We may be called with a QSV or D3D11VA context - if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { - auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx; - - // The encoder requires textures with D3D11_BIND_RENDER_TARGET set - d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET; - d3d11_frames->MiscFlags = 0; - } - - // We require a single texture - frames->initial_pool_size = 1; - } - - int - prepare_to_derive_context(int hw_device_type) override { - // QuickSync requires our device to be multithread-protected - if (hw_device_type == AV_HWDEVICE_TYPE_QSV) { - multithread_t mt; - - auto status = device->QueryInterface(IID_ID3D11Multithread, (void **) &mt); - if (FAILED(status)) { - BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']'; - return -1; - } - - mt->SetMultithreadProtected(TRUE); - } - - return 0; - } - int - set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { - this->hwframe.reset(frame); - this->frame = frame; - - // Populate this frame with a hardware buffer if one isn't there already - if (!frame->buf[0]) { - auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0); - if (err) { - char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; - BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); - return -1; - } - } - - // If this is a frame from a derived context, we'll need to map it to D3D11 - ID3D11Texture2D *frame_texture; - if (frame->format != AV_PIX_FMT_D3D11) { - frame_t d3d11_frame { av_frame_alloc() }; - - d3d11_frame->format = AV_PIX_FMT_D3D11; - - auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE); - if (err) { - char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; - BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); - return -1; - } - - // Get the texture from the mapped frame - frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0]; - } - else { - // Otherwise, we can just use the texture inside the original frame - frame_texture = (ID3D11Texture2D *) frame->data[0]; - } + init_output(ID3D11Texture2D *frame_texture, int width, int height) { + // The underlying frame pool owns the texture, so we must reference it for ourselves + frame_texture->AddRef(); + output_texture.reset(frame_texture); - auto out_width = frame->width; - auto out_height = frame->height; + auto out_width = width; + auto out_height = height; float in_width = display->width; float in_height = display->height; @@ -533,10 +473,6 @@ namespace platf::dxgi { outY_view = D3D11_VIEWPORT { offsetX, offsetY, out_width_f, out_height_f, 0.0f, 1.0f }; outUV_view = D3D11_VIEWPORT { offsetX / 2, offsetY / 2, out_width_f / 2, out_height_f / 2, 0.0f, 1.0f }; - // The underlying frame pool owns the texture, so we must reference it for ourselves - frame_texture->AddRef(); - hwframe_texture.reset(frame_texture); - float info_in[16 / sizeof(float)] { 1.0f / (float) out_width_f }; // aligned to 16-byte info_scene = make_buffer(device.get(), info_in); @@ -550,7 +486,7 @@ namespace platf::dxgi { D3D11_RTV_DIMENSION_TEXTURE2D }; - auto status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_Y_rt); + auto status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_Y_rt); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; @@ -558,7 +494,7 @@ namespace platf::dxgi { nv12_rt_desc.Format = (format == DXGI_FORMAT_P010) ? DXGI_FORMAT_R16G16_UNORM : DXGI_FORMAT_R8G8_UNORM; - status = device->CreateRenderTargetView(hwframe_texture.get(), &nv12_rt_desc, &nv12_UV_rt); + status = device->CreateRenderTargetView(output_texture.get(), &nv12_rt_desc, &nv12_UV_rt); if (FAILED(status)) { BOOST_LOG(error) << "Failed to create render target view [0x"sv << util::hex(status).to_string_view() << ']'; return -1; @@ -574,9 +510,7 @@ namespace platf::dxgi { } int - init( - std::shared_ptr display, adapter_t::pointer adapter_p, - pix_fmt_e pix_fmt) { + init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { D3D_FEATURE_LEVEL featureLevels[] { D3D_FEATURE_LEVEL_11_1, D3D_FEATURE_LEVEL_11_0, @@ -615,8 +549,6 @@ namespace platf::dxgi { BOOST_LOG(warning) << "Failed to increase encoding GPU thread priority. Please run application as administrator for optimal performance."; } - data = device.get(); - format = (pix_fmt == pix_fmt_e::nv12 ? DXGI_FORMAT_NV12 : DXGI_FORMAT_P010); status = device->CreateVertexShader(scene_vs_hlsl->GetBufferPointer(), scene_vs_hlsl->GetBufferSize(), nullptr, &scene_vs); if (status) { @@ -721,7 +653,6 @@ namespace platf::dxgi { return 0; } - private: struct encoder_img_ctx_t { // Used to determine if the underlying texture changes. // Not safe for actual use by the encoder! @@ -789,9 +720,6 @@ namespace platf::dxgi { return 0; } - public: - frame_t hwframe; - ::video::color_t *color_p; buf_t info_scene; @@ -805,9 +733,6 @@ namespace platf::dxgi { render_target_t nv12_Y_rt; render_target_t nv12_UV_rt; - // The image referenced by hwframe - texture2d_t hwframe_texture; - // d3d_img_t::id -> encoder_img_ctx_t // These store the encoder textures for each img_t that passes through // convert(). We can't store them in the img_t itself because it is shared @@ -830,6 +755,201 @@ namespace platf::dxgi { device_t device; device_ctx_t device_ctx; + + texture2d_t output_texture; + }; + + class d3d_avcodec_encoding_stream_t: public avcodec_encoding_stream_t { + public: + int + init(std::shared_ptr display, adapter_t::pointer adapter_p, pix_fmt_e pix_fmt) { + int result = base.init(display, adapter_p, pix_fmt); + data = base.device.get(); + return result; + } + + int + convert(platf::img_t &img_base) override { + return base.convert(img_base); + } + + void + set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + base.set_colorspace(colorspace, color_range); + } + + void + init_hwframes(AVHWFramesContext *frames) override { + // We may be called with a QSV or D3D11VA context + if (frames->device_ctx->type == AV_HWDEVICE_TYPE_D3D11VA) { + auto d3d11_frames = (AVD3D11VAFramesContext *) frames->hwctx; + + // The encoder requires textures with D3D11_BIND_RENDER_TARGET set + d3d11_frames->BindFlags = D3D11_BIND_RENDER_TARGET; + d3d11_frames->MiscFlags = 0; + } + + // We require a single texture + frames->initial_pool_size = 1; + } + + int + prepare_to_derive_context(int hw_device_type) override { + // QuickSync requires our device to be multithread-protected + if (hw_device_type == AV_HWDEVICE_TYPE_QSV) { + multithread_t mt; + + auto status = base.device->QueryInterface(IID_ID3D11Multithread, (void **) &mt); + if (FAILED(status)) { + BOOST_LOG(warning) << "Failed to query ID3D11Multithread interface from device [0x"sv << util::hex(status).to_string_view() << ']'; + return -1; + } + + mt->SetMultithreadProtected(TRUE); + } + + return 0; + } + + int + set_frame(AVFrame *frame, AVBufferRef *hw_frames_ctx) override { + this->hwframe.reset(frame); + this->frame = frame; + + // Populate this frame with a hardware buffer if one isn't there already + if (!frame->buf[0]) { + auto err = av_hwframe_get_buffer(hw_frames_ctx, frame, 0); + if (err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to get hwframe buffer: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + return -1; + } + } + + // If this is a frame from a derived context, we'll need to map it to D3D11 + ID3D11Texture2D *frame_texture; + if (frame->format != AV_PIX_FMT_D3D11) { + frame_t d3d11_frame { av_frame_alloc() }; + + d3d11_frame->format = AV_PIX_FMT_D3D11; + + auto err = av_hwframe_map(d3d11_frame.get(), frame, AV_HWFRAME_MAP_WRITE | AV_HWFRAME_MAP_OVERWRITE); + if (err) { + char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; + BOOST_LOG(error) << "Failed to map D3D11 frame: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); + return -1; + } + + // Get the texture from the mapped frame + frame_texture = (ID3D11Texture2D *) d3d11_frame->data[0]; + } + else { + // Otherwise, we can just use the texture inside the original frame + frame_texture = (ID3D11Texture2D *) frame->data[0]; + } + + return base.init_output(frame_texture, frame->width, frame->height); + } + + private: + d3d_base_encoding_stream_t base; + frame_t hwframe; + }; + + class d3d_nvenc_encoding_stream_t: public nvenc_encoding_stream_t { + public: + bool + init(std::shared_ptr display, adapter_t::pointer adapter_p, const ::video::config_t &client_config, pix_fmt_e pix_fmt) { + auto buffer_format = nvenc::nvenc_format_from_sunshine_format(pix_fmt); + if (buffer_format == NV_ENC_BUFFER_FORMAT_UNDEFINED) { + BOOST_LOG(error) << "Unexpected pixel format for NvENC ["sv << from_pix_fmt(pix_fmt) << ']'; + return false; + } + + if (base.init(display, adapter_p, pix_fmt)) return false; + + // TODO: add sunshine colorspace/primaries/tranfer/matrix constants and translate those to avcodec, nvenc, whatever + { + int color_range = (client_config.encoderCscMode & 0x1) ? 2 : 1; + int color_space; + if (client_config.dynamicRange && display->is_hdr()) { + // When HDR is active, that overrides the colorspace the client requested + color_space = 9; + } + else { + switch (client_config.encoderCscMode >> 1) { + case 0: + default: + // BT.601 + color_space = 5; + break; + + case 1: + // BT.709 + color_space = 1; + break; + + case 2: + // BT.2020 + color_space = 9; + break; + } + } + set_colorspace(color_space, color_range); + } + + nvenc = std::make_unique(base.device.get(), client_config.width, client_config.height, buffer_format); + + auto nvenc_colorspace = nvenc::nvenc_colorspace_from_rtsp_csc(client_config.encoderCscMode, client_config.dynamicRange && display->is_hdr()); + + nvenc::nvenc_config nvenc_config; + + if (!nvenc->create_encoder(nvenc_config, client_config, nvenc_colorspace)) return false; + + return base.init_output(nvenc->get_input_texture(), client_config.width, client_config.height) == 0; + } + + encoded_frame + encode_frame(uint64_t frame_index) override { + auto result = nvenc->encode_frame(frame_index, force_idr); + force_idr = false; + return result; + } + + int + convert(platf::img_t &img_base) override { + return base.convert(img_base); + } + + void + set_colorspace(std::uint32_t colorspace, std::uint32_t color_range) override { + base.set_colorspace(colorspace, color_range); + } + + void + request_idr_frame() override { + force_idr = true; + } + + void + request_normal_frame() override { + force_idr = false; + } + + void + invalidate_ref_frames(int64_t first_frame, int64_t last_frame) override { + if (!nvenc->invalidate_ref_frames(first_frame, last_frame)) { + force_idr = true; + } + } + + ~d3d_nvenc_encoding_stream_t() { + } + + private: + d3d_base_encoding_stream_t base; + std::unique_ptr nvenc; + bool force_idr = false; }; bool @@ -1464,15 +1584,15 @@ namespace platf::dxgi { }; } - std::shared_ptr - display_vram_t::make_hwdevice(pix_fmt_e pix_fmt) { + std::unique_ptr + display_vram_t::make_avcodec_encoding_stream(pix_fmt_e pix_fmt) { if (pix_fmt != platf::pix_fmt_e::nv12 && pix_fmt != platf::pix_fmt_e::p010) { BOOST_LOG(error) << "display_vram_t doesn't support pixel format ["sv << from_pix_fmt(pix_fmt) << ']'; return nullptr; } - auto hwdevice = std::make_shared(); + auto hwdevice = std::make_unique(); auto ret = hwdevice->init( shared_from_this(), @@ -1486,6 +1606,15 @@ namespace platf::dxgi { return hwdevice; } + std::unique_ptr + display_vram_t::make_nvenc_encoding_stream(const ::video::config_t &config, pix_fmt_e pix_fmt) { + auto encoding_stream = std::make_unique(); + if (!encoding_stream->init(shared_from_this(), adapter.get(), config, pix_fmt)) { + return nullptr; + } + return encoding_stream; + } + int init() { BOOST_LOG(info) << "Compiling shaders..."sv; diff --git a/src/rtsp.cpp b/src/rtsp.cpp index 28a029c6a5b..d7eb7ac0fb9 100644 --- a/src/rtsp.cpp +++ b/src/rtsp.cpp @@ -500,6 +500,10 @@ namespace rtsp_stream { ss << "sprop-parameter-sets=AAAAAU"sv << std::endl; } + if (video::last_encoder_probe_supported_invalidate_ref_frames) { + ss << "x-nv-video[0].refPicInvalidation=1"sv << std::endl; + } + for (int x = 0; x < audio::MAX_STREAM_CONFIG; ++x) { auto &stream_config = audio::stream_configs[x]; std::uint8_t mapping[platf::speaker::MAX_SPEAKERS]; diff --git a/src/stream.cpp b/src/stream.cpp index 1643dadee15..e3fa73a08a2 100644 --- a/src/stream.cpp +++ b/src/stream.cpp @@ -355,6 +355,7 @@ namespace stream { int lowseq; udp::endpoint peer; safe::mail_raw_t::event_t idr_events; + safe::mail_raw_t::event_t> invalidate_ref_frames_events; std::unique_ptr qos; } video; @@ -833,7 +834,7 @@ namespace stream { << "firstFrame [" << firstFrame << ']' << std::endl << "lastFrame [" << lastFrame << ']'; - session->video.idr_events->raise(true); + session->video.invalidate_ref_frames_events->raise(std::make_pair(firstFrame, lastFrame)); }); server->map(packetTypes[IDX_INPUT_DATA], [&](session_t *session, const std::string_view &payload) { @@ -895,29 +896,23 @@ namespace stream { return; } - // Ensure compatibility with old packet type - std::string_view next_payload { (char *) plaintext.data(), plaintext.size() }; - auto type = *(std::uint16_t *) next_payload.data(); + auto type = *(std::uint16_t *) plaintext.data(); + std::string_view next_payload { (char *) plaintext.data() + 4, plaintext.size() - 4 }; if (type == packetTypes[IDX_ENCRYPTED]) { BOOST_LOG(error) << "Bad packet type [IDX_ENCRYPTED] found"sv; - session::stop(*session); return; } - // IDX_INPUT_DATA will attempt to decrypt unencrypted data, therefore we need to skip it. - if (type != packetTypes[IDX_INPUT_DATA]) { + // IDX_INPUT_DATA callback will attempt to decrypt unencrypted data, therefore we need pass it directly + if (type == packetTypes[IDX_INPUT_DATA]) { + plaintext.erase(std::begin(plaintext), std::begin(plaintext) + 4); + input::passthrough(session->input, std::move(plaintext)); + } + else { server->call(type, session, next_payload); - - return; } - - // Ensure compatibility with IDX_INPUT_DATA - constexpr auto skip = sizeof(std::uint16_t) * 2; - plaintext.erase(std::begin(plaintext), std::begin(plaintext) + skip); - - input::passthrough(session->input, std::move(plaintext)); }); // This thread handles latency-sensitive control messages @@ -1124,13 +1119,14 @@ namespace stream { auto session = (session_t *) packet->channel_data; auto lowseq = session->video.lowseq; - auto av_packet = packet->av_packet; - std::string_view payload { (char *) av_packet->data, (size_t) av_packet->size }; + std::string_view payload { (char *) packet->data(), packet->data_size() }; std::vector payload_new; video_short_frame_header_t frame_header = {}; frame_header.headerType = 0x01; // Short header type - frame_header.frameType = (av_packet->flags & AV_PKT_FLAG_KEY) ? 2 : 1; + frame_header.frameType = packet->is_idr() ? 2 : + packet->after_ref_frame_invalidation ? 5 : + 1; if (packet->frame_timestamp) { auto duration_to_latency = [](const std::chrono::steady_clock::duration &duration) { @@ -1160,7 +1156,7 @@ namespace stream { payload = { (char *) payload_new.data(), payload_new.size() }; - if (av_packet->flags & AV_PKT_FLAG_KEY) { + if (packet->is_idr() && packet->replacements) { for (auto &replacement : *packet->replacements) { auto frame_old = replacement.old; auto frame_new = replacement._new; @@ -1226,9 +1222,8 @@ namespace stream { for (int x = 0; x < packets; ++x) { auto *inspect = (video_packet_raw_t *) ¤t_payload[x * blocksize]; - auto av_packet = packet->av_packet; - inspect->packet.frameIndex = av_packet->pts; + inspect->packet.frameIndex = packet->frame_index(); inspect->packet.streamPacketIndex = ((uint32_t) lowseq + x) << 8; // Match multiFecFlags with Moonlight @@ -1264,7 +1259,7 @@ namespace stream { inspect->rtp.timestamp = util::endian::big(timestamp); inspect->packet.multiFecBlocks = (blockIndex << 4) | lastBlockIndex; - inspect->packet.frameIndex = av_packet->pts; + inspect->packet.frameIndex = packet->frame_index(); } auto peer_address = session->video.peer.address(); @@ -1286,11 +1281,11 @@ namespace stream { } } - if (av_packet->flags & AV_PKT_FLAG_KEY) { - BOOST_LOG(verbose) << "Key Frame ["sv << av_packet->pts << "] :: send ["sv << shards.size() << "] shards..."sv; + if (packet->is_idr()) { + BOOST_LOG(verbose) << "Key Frame ["sv << packet->frame_index() << "] :: send ["sv << shards.size() << "] shards..."sv; } else { - BOOST_LOG(verbose) << "Frame ["sv << av_packet->pts << "] :: send ["sv << shards.size() << "] shards..."sv << std::endl; + BOOST_LOG(verbose) << "Frame ["sv << packet->frame_index() << "] :: send ["sv << shards.size() << "] shards..."sv << std::endl; } ++blockIndex; @@ -1754,6 +1749,7 @@ namespace stream { }; session->video.idr_events = mail->event(mail::idr); + session->video.invalidate_ref_frames_events = mail->event>(mail::invalidate_ref_frames); session->video.lowseq = 0; constexpr auto max_block_size = crypto::cipher::round_to_pkcs7_padded(2048); diff --git a/src/video.cpp b/src/video.cpp index 33649c18738..d7f63ee7a44 100644 --- a/src/video.cpp +++ b/src/video.cpp @@ -7,6 +7,8 @@ #include #include +#include + extern "C" { #include #include @@ -44,9 +46,9 @@ namespace video { av_buffer_unref(&ref); } - using ctx_t = util::safe_ptr; - using frame_t = util::safe_ptr; - using buffer_t = util::safe_ptr; + using avcodec_ctx_t = util::safe_ptr; + using avcodec_frame_t = util::safe_ptr; + using avcodec_buffer_t = util::safe_ptr; using sws_t = util::safe_ptr; using img_event_t = std::shared_ptr>>; @@ -85,17 +87,14 @@ namespace video { platf::pix_fmt_e map_pix_fmt(AVPixelFormat fmt); - util::Either - dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); - util::Either - vaapi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); - util::Either - cuda_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx); - - int - hwframe_ctx(ctx_t &ctx, platf::hwdevice_t *hwdevice, buffer_t &hwdevice_ctx, AVPixelFormat format); + util::Either + dxgi_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *); + util::Either + vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *); + util::Either + cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *); - class swdevice_t: public platf::hwdevice_t { + class software_avcodec_encoding_stream_t: public platf::avcodec_encoding_stream_t { public: int convert(platf::img_t &img) override { @@ -247,12 +246,12 @@ namespace video { return sws ? 0 : -1; } - ~swdevice_t() override {} + ~software_avcodec_encoding_stream_t() override {} // Store ownership when frame is hw_frame - frame_t hw_frame; + avcodec_frame_t hw_frame; - frame_t sw_frame; + avcodec_frame_t sw_frame; sws_t sws; // offset of input image to output frame in pixels @@ -271,6 +270,53 @@ namespace video { NO_RC_BUF_LIMIT = 0x40, // Don't set rc_buffer_size }; + struct encoder_details_t { + virtual ~encoder_details_t() = default; + platf::mem_type_e dev_type; + platf::pix_fmt_e static_pix_fmt, dynamic_pix_fmt; + bool supports_invalidate_ref_frames = false; + }; + + struct encoder_details_avcodec: encoder_details_t { + using init_buffer_function_t = std::function(platf::avcodec_encoding_stream_t *)>; + + encoder_details_avcodec( + const AVHWDeviceType &avcodec_base_dev_type, + const AVHWDeviceType &avcodec_derived_dev_type, + const AVPixelFormat &avcodec_dev_pix_fmt, + const AVPixelFormat &avcodec_static_pix_fmt, + const AVPixelFormat &avcodec_dynamic_pix_fmt, + const init_buffer_function_t &init_avcodec_hardware_input_buffer_function): + avcodec_base_dev_type { avcodec_base_dev_type }, + avcodec_derived_dev_type { avcodec_derived_dev_type }, + avcodec_dev_pix_fmt { avcodec_dev_pix_fmt }, + avcodec_static_pix_fmt { avcodec_static_pix_fmt }, + avcodec_dynamic_pix_fmt { avcodec_dynamic_pix_fmt }, + init_avcodec_hardware_input_buffer { init_avcodec_hardware_input_buffer_function } { + dev_type = map_base_dev_type(avcodec_base_dev_type); + static_pix_fmt = map_pix_fmt(avcodec_static_pix_fmt); + dynamic_pix_fmt = map_pix_fmt(avcodec_dynamic_pix_fmt); + } + + AVHWDeviceType avcodec_base_dev_type, avcodec_derived_dev_type; + AVPixelFormat avcodec_dev_pix_fmt; + AVPixelFormat avcodec_static_pix_fmt, avcodec_dynamic_pix_fmt; + + init_buffer_function_t init_avcodec_hardware_input_buffer; + }; + + struct encoder_details_nvenc: encoder_details_t { + encoder_details_nvenc( + const platf::mem_type_e &dev_type, + const platf::pix_fmt_e &static_pix_fmt, + const platf::pix_fmt_e &dynamic_pix_fmt) { + encoder_details_t::dev_type = dev_type; + encoder_details_t::static_pix_fmt = static_pix_fmt; + encoder_details_t::dynamic_pix_fmt = dynamic_pix_fmt; + encoder_details_t::supports_invalidate_ref_frames = true; + } + }; + struct encoder_t { std::string_view name; enum flag_e { @@ -311,10 +357,7 @@ namespace video { name { std::move(name) }, value { std::move(value) } {} }; - AVHWDeviceType base_dev_type, derived_dev_type; - AVPixelFormat dev_pix_fmt; - - AVPixelFormat static_pix_fmt, dynamic_pix_fmt; + const std::unique_ptr details; struct { std::vector common_options; @@ -337,28 +380,26 @@ namespace video { } hevc, h264; int flags; - - std::function(platf::hwdevice_t *hwdevice)> make_hwdevice_ctx; }; class session_t { public: session_t() = default; - session_t(ctx_t &&ctx, std::shared_ptr &&device, int inject): - ctx { std::move(ctx) }, device { std::move(device) }, inject { inject } {} + session_t(avcodec_ctx_t &&avcodec_ctx, std::unique_ptr encoding_stream, int inject): + avcodec_ctx { std::move(avcodec_ctx) }, encoding_stream { std::move(encoding_stream) }, inject { inject } {} session_t(session_t &&other) noexcept = default; ~session_t() { // Order matters here because the context relies on the hwdevice still being valid - ctx.reset(); - device.reset(); + avcodec_ctx.reset(); + encoding_stream.reset(); } // Ensure objects are destroyed in the correct order session_t & operator=(session_t &&other) { - device = std::move(other.device); - ctx = std::move(other.ctx); + encoding_stream = std::move(other.encoding_stream); + avcodec_ctx = std::move(other.avcodec_ctx); replacements = std::move(other.replacements); sps = std::move(other.sps); vps = std::move(other.vps); @@ -368,8 +409,8 @@ namespace video { return *this; } - ctx_t ctx; - std::shared_ptr device; + avcodec_ctx_t avcodec_ctx; + std::unique_ptr encoding_stream; std::vector replacements; @@ -433,16 +474,51 @@ namespace video { auto capture_thread_async = safe::make_shared(start_capture_async, end_capture_async); auto capture_thread_sync = safe::make_shared(start_capture_sync, end_capture_sync); + static encoder_t nvenc_standalone { + "nvenc-standalone"sv, + std::make_unique( + platf::mem_type_e::dxgi, + platf::pix_fmt_e::nv12, platf::pix_fmt_e::p010), + { + // Common options + {}, + // SDR-specific options + {}, + // HDR-specific options + {}, + std::nullopt, // QP + "hevc_nvenc-standalone"s, + }, + { + // Common options + {}, + // SDR-specific options + {}, + // HDR-specific options + {}, + std::nullopt, // QP + "h264_nvenc-standalone"s, + }, + PARALLEL_ENCODING // flags + }; + static encoder_t nvenc { "nvenc"sv, + std::make_unique( +#ifdef _WIN32 + AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_D3D11, +#else + AV_HWDEVICE_TYPE_CUDA, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_CUDA, +#endif + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, #ifdef _WIN32 - AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_D3D11, + dxgi_init_avcodec_hardware_input_buffer #else - AV_HWDEVICE_TYPE_CUDA, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_CUDA, + cuda_init_avcodec_hardware_input_buffer #endif - AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + ), { // Common options { @@ -482,22 +558,17 @@ namespace video { std::make_optional({ "qp"s, &config::video.qp }), "h264_nvenc"s, }, - PARALLEL_ENCODING, -#ifdef _WIN32 - dxgi_make_hwdevice_ctx -#else - cuda_make_hwdevice_ctx -#endif + PARALLEL_ENCODING }; #ifdef _WIN32 static encoder_t quicksync { "quicksync"sv, - AV_HWDEVICE_TYPE_D3D11VA, - AV_HWDEVICE_TYPE_QSV, - AV_PIX_FMT_QSV, - AV_PIX_FMT_NV12, - AV_PIX_FMT_P010, + std::make_unique( + AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_QSV, + AV_PIX_FMT_QSV, + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + dxgi_init_avcodec_hardware_input_buffer), { // Common options { @@ -542,15 +613,16 @@ namespace video { std::make_optional({ "qp"s, &config::video.qp }), "h264_qsv"s, }, - PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT, - dxgi_make_hwdevice_ctx, + PARALLEL_ENCODING | CBR_WITH_VBR | RELAXED_COMPLIANCE | NO_RC_BUF_LIMIT }; static encoder_t amdvce { "amdvce"sv, - AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_D3D11, - AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + std::make_unique( + AV_HWDEVICE_TYPE_D3D11VA, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_D3D11, + AV_PIX_FMT_NV12, AV_PIX_FMT_P010, + dxgi_init_avcodec_hardware_input_buffer), { // Common options { @@ -588,16 +660,17 @@ namespace video { std::make_optional({ "qp_p"s, &config::video.qp }), "h264_amf"s, }, - PARALLEL_ENCODING, - dxgi_make_hwdevice_ctx + PARALLEL_ENCODING }; #endif static encoder_t software { "software"sv, - AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_NONE, - AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, + std::make_unique( + AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_NONE, + AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV420P10, + nullptr), { // x265's Info SEI is so long that it causes the IDR picture data to be // kicked to the 2nd packet in the frame, breaking Moonlight's parsing logic. @@ -625,17 +698,17 @@ namespace video { std::make_optional("qp"s, &config::video.qp), "libx264"s, }, - H264_ONLY | PARALLEL_ENCODING, - - nullptr + H264_ONLY | PARALLEL_ENCODING }; #ifdef __linux__ static encoder_t vaapi { "vaapi"sv, - AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_VAAPI, - AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P10, + std::make_unique( + AV_HWDEVICE_TYPE_VAAPI, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_VAAPI, + AV_PIX_FMT_NV12, AV_PIX_FMT_YUV420P10, + vaapi_init_avcodec_hardware_input_buffer), { // Common options { @@ -660,18 +733,18 @@ namespace video { std::make_optional("qp"s, &config::video.qp), "h264_vaapi"s, }, - LIMITED_GOP_SIZE | PARALLEL_ENCODING | SINGLE_SLICE_ONLY | NO_RC_BUF_LIMIT, - - vaapi_make_hwdevice_ctx + LIMITED_GOP_SIZE | PARALLEL_ENCODING | SINGLE_SLICE_ONLY | NO_RC_BUF_LIMIT }; #endif #ifdef __APPLE__ static encoder_t videotoolbox { "videotoolbox"sv, - AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE, - AV_PIX_FMT_VIDEOTOOLBOX, - AV_PIX_FMT_NV12, AV_PIX_FMT_NV12, + std::make_unique( + AV_HWDEVICE_TYPE_NONE, AV_HWDEVICE_TYPE_NONE, + AV_PIX_FMT_VIDEOTOOLBOX, + AV_PIX_FMT_NV12, AV_PIX_FMT_NV12, + nullptr), { // Common options { @@ -696,13 +769,14 @@ namespace video { std::nullopt, "h264_videotoolbox"s, }, - DEFAULT, - - nullptr + DEFAULT }; #endif static const std::vector encoders { +#ifdef _WIN32 + &nvenc_standalone, +#endif #ifndef __APPLE__ &nvenc, #endif @@ -721,13 +795,14 @@ namespace video { static encoder_t *chosen_encoder; int active_hevc_mode; + bool last_encoder_probe_supported_invalidate_ref_frames = false; void - reset_display(std::shared_ptr &disp, AVHWDeviceType type, const std::string &display_name, const config_t &config) { + reset_display(std::shared_ptr &disp, const platf::mem_type_e &type, const std::string &display_name, const config_t &config) { // We try this twice, in case we still get an error on reinitialization for (int x = 0; x < 2; ++x) { disp.reset(); - disp = platf::display(map_base_dev_type(type), display_name, config); + disp = platf::display(type, display_name, config); if (disp) { break; } @@ -761,7 +836,7 @@ namespace video { // Get all the monitor names now, rather than at boot, to // get the most up-to-date list available monitors - auto display_names = platf::display_names(map_base_dev_type(encoder.base_dev_type)); + auto display_names = platf::display_names(encoder.details->dev_type); int display_p = 0; if (display_names.empty()) { @@ -783,7 +858,7 @@ namespace video { } capture_ctxs.emplace_back(std::move(*initial_capture_ctx)); - auto disp = platf::display(map_base_dev_type(encoder.base_dev_type), display_names[display_p], capture_ctxs.front().config); + auto disp = platf::display(encoder.details->dev_type, display_names[display_p], capture_ctxs.front().config); if (!disp) { return; } @@ -967,7 +1042,7 @@ namespace video { while (capture_ctx_queue->running()) { // reset_display() will sleep between retries - reset_display(disp, encoder.base_dev_type, display_names[display_p], capture_ctxs.front().config); + reset_display(disp, encoder.details->dev_type, display_names[display_p], capture_ctxs.front().config); if (disp) { break; } @@ -994,10 +1069,10 @@ namespace video { } int - encode(int64_t frame_nr, session_t &session, frame_t::pointer frame, safe::mail_raw_t::queue_t &packets, void *channel_data, const std::optional &frame_timestamp) { + encode_avcodec(int64_t frame_nr, session_t &session, avcodec_frame_t::pointer frame, safe::mail_raw_t::queue_t &packets, void *channel_data, std::optional frame_timestamp) { frame->pts = frame_nr; - auto &ctx = session.ctx; + auto &ctx = session.avcodec_ctx; auto &sps = session.sps; auto &vps = session.vps; @@ -1012,7 +1087,7 @@ namespace video { } while (ret >= 0) { - auto packet = std::make_unique(nullptr); + auto packet = std::make_unique(); auto av_packet = packet.get()->av_packet; ret = avcodec_receive_packet(ctx.get(), av_packet); @@ -1059,9 +1134,48 @@ namespace video { return 0; } + int + encode_nvenc(int64_t frame_nr, session_t &session, platf::nvenc_encoding_stream_t &encoding_stream, safe::mail_raw_t::queue_t &packets, void *channel_data, std::optional frame_timestamp) { + auto encoded_frame = encoding_stream.encode_frame(frame_nr); + if (encoded_frame.data.empty()) { + BOOST_LOG(error) << "NvENC returned empty packet"; + return -1; + } + + if (frame_nr != encoded_frame.frame_index) { + BOOST_LOG(error) << "NvENC frame index mismatch " << frame_nr << " " << encoded_frame.frame_index; + } + + auto packet = std::make_unique(std::move(encoded_frame.data), encoded_frame.frame_index, encoded_frame.idr); + packet->replacements = &session.replacements; + packet->channel_data = channel_data; + packet->after_ref_frame_invalidation = encoded_frame.after_ref_frame_invalidation; + packet->frame_timestamp = frame_timestamp; + packets->raise(std::move(packet)); + + return 0; + } + + int + encode(int64_t frame_nr, session_t &session, safe::mail_raw_t::queue_t &packets, void *channel_data, std::optional frame_timestamp) { + if (auto avcodec_encoding_stream = dynamic_cast(session.encoding_stream.get())) { + return encode_avcodec(frame_nr, session, avcodec_encoding_stream->frame, packets, channel_data, frame_timestamp); + } + else if (auto nvenc_encoding_stream = dynamic_cast(session.encoding_stream.get())) { + return encode_nvenc(frame_nr, session, *nvenc_encoding_stream, packets, channel_data, frame_timestamp); + } + + return -1; + } + std::optional - make_session(platf::display_t *disp, const encoder_t &encoder, const config_t &config, int width, int height, std::shared_ptr &&hwdevice) { - bool hardware = encoder.base_dev_type != AV_HWDEVICE_TYPE_NONE; + make_session_avcodec(platf::display_t *disp, const encoder_t &encoder, const config_t &config, int width, int height, std::unique_ptr encoding_stream) { + auto encoder_details = dynamic_cast(encoder.details.get()); + if (!encoder_details) { + return std::nullopt; + } + + bool hardware = encoder_details->avcodec_base_dev_type != AV_HWDEVICE_TYPE_NONE; auto &video_format = config.videoFormat == 0 ? encoder.h264 : encoder.hevc; if (!video_format[encoder_t::PASSED]) { @@ -1081,7 +1195,7 @@ namespace video { return std::nullopt; } - ctx_t ctx { avcodec_alloc_context3(codec) }; + avcodec_ctx_t ctx { avcodec_alloc_context3(codec) }; ctx->width = config.width; ctx->height = config.height; ctx->time_base = AVRational { 1, config.framerate }; @@ -1167,37 +1281,37 @@ namespace video { AVPixelFormat sw_fmt; if (config.dynamicRange == 0) { - sw_fmt = encoder.static_pix_fmt; + sw_fmt = encoder_details->avcodec_static_pix_fmt; } else { - sw_fmt = encoder.dynamic_pix_fmt; + sw_fmt = encoder_details->avcodec_dynamic_pix_fmt; } // Used by cbs::make_sps_hevc ctx->sw_pix_fmt = sw_fmt; if (hardware) { - buffer_t hwdevice_ctx; + avcodec_buffer_t encoding_stream_context; - ctx->pix_fmt = encoder.dev_pix_fmt; + ctx->pix_fmt = encoder_details->avcodec_dev_pix_fmt; // Create the base hwdevice context - auto buf_or_error = encoder.make_hwdevice_ctx(hwdevice.get()); + auto buf_or_error = encoder_details->init_avcodec_hardware_input_buffer(encoding_stream.get()); if (buf_or_error.has_right()) { return std::nullopt; } - hwdevice_ctx = std::move(buf_or_error.left()); + encoding_stream_context = std::move(buf_or_error.left()); // If this encoder requires derivation from the base, derive the desired type - if (encoder.derived_dev_type != AV_HWDEVICE_TYPE_NONE) { - buffer_t derived_hwdevice_ctx; + if (encoder_details->avcodec_derived_dev_type != AV_HWDEVICE_TYPE_NONE) { + avcodec_buffer_t derived_context; // Allow the hwdevice to prepare for this type of context to be derived - if (hwdevice->prepare_to_derive_context(encoder.derived_dev_type)) { + if (encoding_stream->prepare_to_derive_context(encoder_details->avcodec_derived_dev_type)) { return std::nullopt; } - auto err = av_hwdevice_ctx_create_derived(&derived_hwdevice_ctx, encoder.derived_dev_type, hwdevice_ctx.get(), 0); + auto err = av_hwdevice_ctx_create_derived(&derived_context, encoder_details->avcodec_derived_dev_type, encoding_stream_context.get(), 0); if (err) { char err_str[AV_ERROR_MAX_STRING_SIZE] { 0 }; BOOST_LOG(error) << "Failed to derive device context: "sv << av_make_error_string(err_str, AV_ERROR_MAX_STRING_SIZE, err); @@ -1205,11 +1319,28 @@ namespace video { return std::nullopt; } - hwdevice_ctx = std::move(derived_hwdevice_ctx); + encoding_stream_context = std::move(derived_context); } - if (hwframe_ctx(ctx, hwdevice.get(), hwdevice_ctx, sw_fmt)) { - return std::nullopt; + // Initialize avcodec hardware frames + { + avcodec_buffer_t frame_ref { av_hwframe_ctx_alloc(encoding_stream_context.get()) }; + + auto frame_ctx = (AVHWFramesContext *) frame_ref->data; + frame_ctx->format = ctx->pix_fmt; + frame_ctx->sw_format = sw_fmt; + frame_ctx->height = ctx->height; + frame_ctx->width = ctx->width; + frame_ctx->initial_pool_size = 0; + + // Allow the hwdevice to modify hwframe context parameters + encoding_stream->init_hwframes(frame_ctx); + + if (auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { + return std::nullopt; + } + + ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get()); } ctx->slices = config.slicesPerFrame; @@ -1298,7 +1429,7 @@ namespace video { return std::nullopt; } - frame_t frame { av_frame_alloc() }; + avcodec_frame_t frame { av_frame_alloc() }; frame->format = ctx->pix_fmt; frame->width = ctx->width; frame->height = ctx->height; @@ -1334,30 +1465,30 @@ namespace video { } } - std::shared_ptr device; + std::unique_ptr encoding_stream_final; - if (!hwdevice->data) { - auto device_tmp = std::make_unique(); + if (!encoding_stream->data) { + auto software_encoding_stream = std::make_unique(); - if (device_tmp->init(width, height, frame.get(), sw_fmt, hardware)) { + if (software_encoding_stream->init(width, height, frame.get(), sw_fmt, hardware)) { return std::nullopt; } - device = std::move(device_tmp); + encoding_stream_final = std::move(software_encoding_stream); } else { - device = std::move(hwdevice); + encoding_stream_final = std::move(encoding_stream); } - if (device->set_frame(frame.release(), ctx->hw_frames_ctx)) { + if (encoding_stream_final->set_frame(frame.release(), ctx->hw_frames_ctx)) { return std::nullopt; } - device->set_colorspace(sws_color_space, ctx->color_range); + encoding_stream_final->set_colorspace(sws_color_space, ctx->color_range); session_t session { std::move(ctx), - std::move(device), + std::move(encoding_stream_final), // 0 ==> don't inject, 1 ==> inject for h264, 2 ==> inject for hevc (1 - (int) video_format[encoder_t::VUI_PARAMETERS]) * (1 + config.videoFormat), @@ -1366,6 +1497,19 @@ namespace video { return std::make_optional(std::move(session)); } + std::optional + make_session(platf::display_t *disp, const encoder_t &encoder, const config_t &config, int width, int height, std::unique_ptr encoding_stream) { + if (dynamic_cast(encoding_stream.get())) { + auto avcodec_encoding_stream = boost::dynamic_pointer_cast(std::move(encoding_stream)); + return make_session_avcodec(disp, encoder, config, width, height, std::move(avcodec_encoding_stream)); + } + else if (dynamic_cast(encoding_stream.get())) { + return session_t(nullptr, std::move(encoding_stream), 0); + } + + return std::nullopt; + } + void encode_run( int &frame_nr, // Store progress of the frame number @@ -1373,20 +1517,19 @@ namespace video { img_event_t images, config_t config, std::shared_ptr disp, - std::shared_ptr &&hwdevice, + std::unique_ptr encoding_stream, safe::signal_t &reinit_event, const encoder_t &encoder, void *channel_data) { - auto session = make_session(disp.get(), encoder, config, disp->width, disp->height, std::move(hwdevice)); + auto session = make_session(disp.get(), encoder, config, disp->width, disp->height, std::move(encoding_stream)); if (!session) { return; } - auto frame = session->device->frame; - auto shutdown_event = mail->event(mail::shutdown); auto packets = mail::man->queue(mail::video_packets); auto idr_events = mail->event(mail::idr); + auto invalidate_ref_frames_events = mail->event>(mail::invalidate_ref_frames); { // Load a dummy image into the AVFrame to ensure we have something to encode @@ -1394,7 +1537,7 @@ namespace video { // allocation which can be freed immediately after convert(), so we do this // in a separate scope. auto dummy_img = disp->alloc_img(); - if (!dummy_img || disp->dummy_img(dummy_img.get()) || session->device->convert(*dummy_img)) { + if (!dummy_img || disp->dummy_img(dummy_img.get()) || session->encoding_stream->convert(*dummy_img)) { return; } } @@ -1404,20 +1547,35 @@ namespace video { break; } - if (idr_events->peek()) { - frame->pict_type = AV_PICTURE_TYPE_I; - frame->key_frame = 1; + bool requested_idr_frame = false; + + while (invalidate_ref_frames_events->peek()) { + if (auto frames = invalidate_ref_frames_events->pop(0ms)) { + if (encoder.details->supports_invalidate_ref_frames) { + session->encoding_stream->invalidate_ref_frames(frames->first, frames->second); + } + else { + requested_idr_frame = true; + } + } + } + if (idr_events->peek()) { + requested_idr_frame = true; idr_events->pop(); } + if (requested_idr_frame) { + session->encoding_stream->request_idr_frame(); + } + std::optional frame_timestamp; // Encode at a minimum of 10 FPS to avoid image quality issues with static content - if (!frame->key_frame || images->peek()) { + if (!requested_idr_frame || images->peek()) { if (auto img = images->pop(100ms)) { frame_timestamp = img->frame_timestamp; - if (session->device->convert(*img)) { + if (session->encoding_stream->convert(*img)) { BOOST_LOG(error) << "Could not convert image"sv; return; } @@ -1427,13 +1585,12 @@ namespace video { } } - if (encode(frame_nr++, *session, frame, packets, channel_data, frame_timestamp)) { + if (encode(frame_nr++, *session, packets, channel_data, frame_timestamp)) { BOOST_LOG(error) << "Could not encode video packet"sv; return; } - frame->pict_type = AV_PICTURE_TYPE_NONE; - frame->key_frame = 0; + session->encoding_stream->request_normal_frame(); } } @@ -1468,15 +1625,27 @@ namespace video { }; } + std::unique_ptr + make_encoding_stream(platf::display_t &disp, const encoder_t &encoder, const config_t &config, platf::pix_fmt_e pix_fmt) { + if (dynamic_cast(encoder.details.get())) { + return disp.make_avcodec_encoding_stream(pix_fmt); + } + else if (dynamic_cast(encoder.details.get())) { + return disp.make_nvenc_encoding_stream(config, pix_fmt); + } + + return nullptr; + } + std::optional make_synced_session(platf::display_t *disp, const encoder_t &encoder, platf::img_t &img, sync_session_ctx_t &ctx) { sync_session_t encode_session; encode_session.ctx = &ctx; - auto pix_fmt = ctx.config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); - auto hwdevice = disp->make_hwdevice(pix_fmt); - if (!hwdevice) { + auto pix_fmt = ctx.config.dynamicRange == 0 ? encoder.details->static_pix_fmt : encoder.details->dynamic_pix_fmt; + auto encoding_stream = make_encoding_stream(*disp, encoder, ctx.config, pix_fmt); + if (!encoding_stream) { return std::nullopt; } @@ -1491,7 +1660,7 @@ namespace video { } ctx.hdr_events->raise(std::move(hdr_info)); - auto session = make_session(disp, encoder, ctx.config, img.width, img.height, std::move(hwdevice)); + auto session = make_session(disp, encoder, ctx.config, img.width, img.height, std::move(encoding_stream)); if (!session) { return std::nullopt; } @@ -1506,7 +1675,7 @@ namespace video { std::vector> &synced_session_ctxs, encode_session_ctx_queue_t &encode_session_ctx_queue) { const auto &encoder = *chosen_encoder; - auto display_names = platf::display_names(map_base_dev_type(encoder.base_dev_type)); + auto display_names = platf::display_names(encoder.details->dev_type); int display_p = 0; if (display_names.empty()) { @@ -1536,7 +1705,7 @@ namespace video { while (encode_session_ctx_queue.running()) { // reset_display() will sleep between retries - reset_display(disp, encoder.base_dev_type, display_names[display_p], synced_session_ctxs.front()->config); + reset_display(disp, encoder.details->dev_type, display_names[display_p], synced_session_ctxs.front()->config); if (disp) { break; } @@ -1582,7 +1751,6 @@ namespace video { } KITTY_WHILE_LOOP(auto pos = std::begin(synced_sessions), pos != std::end(synced_sessions), { - auto frame = pos->session.device->frame; auto ctx = pos->ctx; if (ctx->shutdown_event->peek()) { // Let waiting thread know it can delete shutdown_event @@ -1601,13 +1769,11 @@ namespace video { } if (ctx->idr_events->peek()) { - frame->pict_type = AV_PICTURE_TYPE_I; - frame->key_frame = 1; - + pos->session.encoding_stream->request_idr_frame(); ctx->idr_events->pop(); } - if (frame_captured && pos->session.device->convert(*img)) { + if (frame_captured && pos->session.encoding_stream->convert(*img)) { BOOST_LOG(error) << "Could not convert image"sv; ctx->shutdown_event->raise(true); @@ -1619,15 +1785,14 @@ namespace video { frame_timestamp = img->frame_timestamp; } - if (encode(ctx->frame_nr++, pos->session, frame, ctx->packets, ctx->channel_data, frame_timestamp)) { + if (encode(ctx->frame_nr++, pos->session, ctx->packets, ctx->channel_data, frame_timestamp)) { BOOST_LOG(error) << "Could not encode video packet"sv; ctx->shutdown_event->raise(true); continue; } - frame->pict_type = AV_PICTURE_TYPE_NONE; - frame->key_frame = 0; + pos->session.encoding_stream->request_normal_frame(); ++pos; }) @@ -1739,9 +1904,9 @@ namespace video { } auto &encoder = *chosen_encoder; - auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); - auto hwdevice = display->make_hwdevice(pix_fmt); - if (!hwdevice) { + auto pix_fmt = config.dynamicRange == 0 ? encoder.details->static_pix_fmt : encoder.details->dynamic_pix_fmt; + auto encoding_stream = make_encoding_stream(*display, encoder, config, pix_fmt); + if (!encoding_stream) { return; } @@ -1760,7 +1925,7 @@ namespace video { frame_nr, mail, images, config, display, - std::move(hwdevice), + std::move(encoding_stream), ref->reinit_event, *ref->encoder_p, channel_data); } @@ -1803,18 +1968,18 @@ namespace video { int validate_config(std::shared_ptr &disp, const encoder_t &encoder, const config_t &config) { - reset_display(disp, encoder.base_dev_type, config::video.output_name, config); + reset_display(disp, encoder.details->dev_type, config::video.output_name, config); if (!disp) { return -1; } - auto pix_fmt = config.dynamicRange == 0 ? map_pix_fmt(encoder.static_pix_fmt) : map_pix_fmt(encoder.dynamic_pix_fmt); - auto hwdevice = disp->make_hwdevice(pix_fmt); - if (!hwdevice) { + auto pix_fmt = config.dynamicRange == 0 ? encoder.details->static_pix_fmt : encoder.details->dynamic_pix_fmt; + auto initial_encoding_stream = make_encoding_stream(*disp, encoder, config, pix_fmt); + if (!initial_encoding_stream) { return -1; } - auto session = make_session(disp.get(), encoder, config, disp->width, disp->height, std::move(hwdevice)); + auto session = make_session(disp.get(), encoder, config, disp->width, disp->height, std::move(initial_encoding_stream)); if (!session) { return -1; } @@ -1822,32 +1987,35 @@ namespace video { { // Image buffers are large, so we use a separate scope to free it immediately after convert() auto img = disp->alloc_img(); - if (!img || disp->dummy_img(img.get()) || session->device->convert(*img)) { + if (!img || disp->dummy_img(img.get()) || session->encoding_stream->convert(*img)) { return -1; } } - auto frame = session->device->frame; - - frame->pict_type = AV_PICTURE_TYPE_I; + session->encoding_stream->request_idr_frame(); auto packets = mail::man->queue(mail::video_packets); while (!packets->peek()) { - if (encode(1, *session, frame, packets, nullptr, {})) { + if (encode(1, *session, packets, nullptr, {})) { return -1; } } auto packet = packets->pop(); - auto av_packet = packet->av_packet; - if (!(av_packet->flags & AV_PKT_FLAG_KEY)) { + if (!packet->is_idr()) { BOOST_LOG(error) << "First packet type is not an IDR frame"sv; return -1; } int flag = 0; - if (cbs::validate_sps(&*av_packet, config.videoFormat ? AV_CODEC_ID_H265 : AV_CODEC_ID_H264)) { + if (auto packet_avcodec = dynamic_cast(packet.get())) { + if (cbs::validate_sps(packet_avcodec->av_packet, config.videoFormat ? AV_CODEC_ID_H265 : AV_CODEC_ID_H264)) { + flag |= VUI_PARAMS; + } + } + else { + // Don't check it for non-avcodec encoders. flag |= VUI_PARAMS; } @@ -1984,6 +2152,7 @@ namespace video { auto previous_encoder = chosen_encoder; chosen_encoder = nullptr; active_hevc_mode = config::video.hevc_mode; + last_encoder_probe_supported_invalidate_ref_frames = false; if (!config::video.encoder.empty()) { // If there is a specific encoder specified, use it if it passes validation @@ -2003,6 +2172,8 @@ namespace video { active_hevc_mode = 0; } + last_encoder_probe_supported_invalidate_ref_frames = encoder->details->supports_invalidate_ref_frames; + chosen_encoder = encoder; break; } @@ -2101,39 +2272,16 @@ namespace video { return 0; } - int - hwframe_ctx(ctx_t &ctx, platf::hwdevice_t *hwdevice, buffer_t &hwdevice_ctx, AVPixelFormat format) { - buffer_t frame_ref { av_hwframe_ctx_alloc(hwdevice_ctx.get()) }; - - auto frame_ctx = (AVHWFramesContext *) frame_ref->data; - frame_ctx->format = ctx->pix_fmt; - frame_ctx->sw_format = format; - frame_ctx->height = ctx->height; - frame_ctx->width = ctx->width; - frame_ctx->initial_pool_size = 0; - - // Allow the hwdevice to modify hwframe context parameters - hwdevice->init_hwframes(frame_ctx); - - if (auto err = av_hwframe_ctx_init(frame_ref.get()); err < 0) { - return err; - } - - ctx->hw_frames_ctx = av_buffer_ref(frame_ref.get()); - - return 0; - } - // Linux only declaration - typedef int (*vaapi_make_hwdevice_ctx_fn)(platf::hwdevice_t *base, AVBufferRef **hw_device_buf); + typedef int (*vaapi_init_avcodec_hardware_input_buffer_fn)(platf::avcodec_encoding_stream_t *stream, AVBufferRef **hw_device_buf); - util::Either - vaapi_make_hwdevice_ctx(platf::hwdevice_t *base) { - buffer_t hw_device_buf; + util::Either + vaapi_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *stream) { + avcodec_buffer_t hw_device_buf; // If an egl hwdevice - if (base->data) { - if (((vaapi_make_hwdevice_ctx_fn) base->data)(base, &hw_device_buf)) { + if (stream->data) { + if (((vaapi_init_avcodec_hardware_input_buffer_fn) stream->data)(stream, &hw_device_buf)) { return -1; } @@ -2152,9 +2300,9 @@ namespace video { return hw_device_buf; } - util::Either - cuda_make_hwdevice_ctx(platf::hwdevice_t *base) { - buffer_t hw_device_buf; + util::Either + cuda_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *stream) { + avcodec_buffer_t hw_device_buf; auto status = av_hwdevice_ctx_create(&hw_device_buf, AV_HWDEVICE_TYPE_CUDA, nullptr, nullptr, 1 /* AV_CUDA_USE_PRIMARY_CONTEXT */); if (status < 0) { @@ -2173,14 +2321,14 @@ void do_nothing(void *) {} namespace video { - util::Either - dxgi_make_hwdevice_ctx(platf::hwdevice_t *hwdevice_ctx) { - buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; + util::Either + dxgi_init_avcodec_hardware_input_buffer(platf::avcodec_encoding_stream_t *stream) { + avcodec_buffer_t ctx_buf { av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA) }; auto ctx = (AVD3D11VADeviceContext *) ((AVHWDeviceContext *) ctx_buf->data)->hwctx; std::fill_n((std::uint8_t *) ctx, sizeof(AVD3D11VADeviceContext), 0); - auto device = (ID3D11Device *) hwdevice_ctx->data; + auto device = (ID3D11Device *) stream->data; device->AddRef(); ctx->device = device; diff --git a/src/video.h b/src/video.h index d906a2f93b5..80c9cd364dd 100644 --- a/src/video.h +++ b/src/video.h @@ -16,25 +16,19 @@ struct AVPacket; namespace video { struct packet_raw_t { - void - init_packet() { - this->av_packet = av_packet_alloc(); - } + virtual ~packet_raw_t() = default; - template - explicit packet_raw_t(P *user_data): - channel_data { user_data } { - init_packet(); - } + virtual bool + is_idr() = 0; - explicit packet_raw_t(std::nullptr_t): - channel_data { nullptr } { - init_packet(); - } + virtual int64_t + frame_index() = 0; - ~packet_raw_t() { - av_packet_free(&this->av_packet); - } + virtual uint8_t * + data() = 0; + + virtual size_t + data_size() = 0; struct replace_t { std::string_view old; @@ -46,11 +40,72 @@ namespace video { old { std::move(old) }, _new { std::move(_new) } {} }; + std::vector *replacements = nullptr; + void *channel_data = nullptr; + bool after_ref_frame_invalidation = false; + std::optional frame_timestamp; + }; + + struct packet_raw_avcodec: packet_raw_t { + packet_raw_avcodec() { + av_packet = av_packet_alloc(); + } + + ~packet_raw_avcodec() { + av_packet_free(&this->av_packet); + } + + bool + is_idr() override { + return av_packet->flags & AV_PKT_FLAG_KEY; + } + + int64_t + frame_index() override { + return av_packet->pts; + } + + uint8_t * + data() override { + return av_packet->data; + } + + size_t + data_size() override { + return av_packet->size; + } + AVPacket *av_packet; - std::vector *replacements; - void *channel_data; + }; - std::optional frame_timestamp; + struct packet_raw_generic: packet_raw_t { + packet_raw_generic(std::vector &&frame_data, int64_t frame_index, bool idr): + frame_data { std::move(frame_data) }, index { frame_index }, idr { idr } { + } + + bool + is_idr() override { + return idr; + } + + int64_t + frame_index() override { + return index; + } + + uint8_t * + data() override { + return frame_data.data(); + } + + size_t + data_size() override { + return frame_data.size(); + } + + std::vector frame_data; + int64_t index; + bool idr; }; using packet_t = std::unique_ptr; @@ -67,16 +122,22 @@ namespace video { using hdr_info_t = std::unique_ptr; + /* Encoding configuration requested by remote client */ struct config_t { - int width; - int height; - int framerate; - int bitrate; - int slicesPerFrame; - int numRefFrames; + int width; // video width in pixels + int height; // video height in pixels + int framerate; // requested framerate, used in individual frame bitrate budget calculation + int bitrate; // video bitrate in kilobits (1000 bits) for requested framerate + int slicesPerFrame; // number of slices per frame + int numRefFrames; // max number of reference frames + + /* requested color range and SDR colorspace, HDR colorspace is always BT.2020. + color range (encoderCscMode & 0x1) : 0 - limited, 1 - full + SDR colorspace (encoderCscMode >> 1) : 0 - BT.601, 1 - BT.709, 2 - BT.2020 */ int encoderCscMode; - int videoFormat; - int dynamicRange; + + int videoFormat; // 0 - h264, 1 - HEVC + int dynamicRange; // 0 - SDR, 1 - HDR }; using float4 = float[4]; @@ -94,6 +155,7 @@ namespace video { extern color_t colors[6]; extern int active_hevc_mode; + extern bool last_encoder_probe_supported_invalidate_ref_frames; void capture(