diff --git a/worker/include/RTC/Codecs/PayloadDescriptorHandler.hpp b/worker/include/RTC/Codecs/PayloadDescriptorHandler.hpp index d2f2846b02..b1ee2733d7 100644 --- a/worker/include/RTC/Codecs/PayloadDescriptorHandler.hpp +++ b/worker/include/RTC/Codecs/PayloadDescriptorHandler.hpp @@ -2,6 +2,8 @@ #define MS_RTC_CODECS_PAYLOAD_DESCRIPTOR_HANDLER_HPP #include "common.hpp" +#include "RTC/SeqManager.hpp" +#include namespace RTC { @@ -14,6 +16,61 @@ namespace RTC virtual void Dump() const = 0; }; + class PictureIdList + { + static constexpr uint16_t MaxCurrentLayerPictureIdNum{ 1000u }; + + public: + explicit PictureIdList() + { + } + ~PictureIdList() + { + this->list.clear(); + } + + void Push(uint16_t pictureId, int16_t layer) + { + for (const auto& it : this->list) + { + // Layers can be changed only with ordered pictureId values. + // If pictureId is lower than the previous one, then it has rolled over the max value. + uint16_t diff = pictureId > it.first + ? pictureId - it.first + : pictureId + RTC::SeqManager::MaxValue - it.first; + + if (diff > MaxCurrentLayerPictureIdNum) + { + this->list.pop_front(); + } + else + { + break; + } + } + this->list.push_back({ pictureId, layer }); + } + + int16_t GetLayer(uint16_t pictureId) const + { + if (this->list.size() > 1) + { + for (auto it = std::next(this->list.begin()); it != this->list.end(); ++it) + { + if (RTC::SeqManager::IsSeqHigherThan(it->first, pictureId)) + { + return std::prev(it)->second; + } + } + } + + return -1; + } + + private: + std::deque> list; + }; + // Encoding context used by PayloadDescriptorHandler to properly rewrite the // PayloadDescriptor. class EncodingContext @@ -87,6 +144,49 @@ namespace RTC } virtual void SyncRequired() = 0; + void SetCurrentSpatialLayer(int16_t spatialLayer, uint16_t pictureId) + { + if (this->currentSpatialLayer == spatialLayer) + { + return; + } + + this->spatialLayerPictureIdList.Push(pictureId, spatialLayer); + + this->currentSpatialLayer = spatialLayer; + } + void SetCurrentTemporalLayer(int16_t temporalLayer, uint16_t pictureId) + { + if (this->currentTemporalLayer == temporalLayer) + { + return; + } + + this->temporalLayerPictureIdList.Push(pictureId, temporalLayer); + + this->currentTemporalLayer = temporalLayer; + } + int16_t GetCurrentSpatialLayer(uint16_t pictureId) const + { + int16_t layer = this->spatialLayerPictureIdList.GetLayer(pictureId); + if (layer > -1) + { + return layer; + } + + return this->currentSpatialLayer; + } + int16_t GetCurrentTemporalLayer(uint16_t pictureId) const + { + int16_t layer = this->temporalLayerPictureIdList.GetLayer(pictureId); + if (layer > -1) + { + return layer; + } + + return this->currentTemporalLayer; + } + private: Params params; int16_t targetSpatialLayer{ -1 }; @@ -94,6 +194,10 @@ namespace RTC int16_t currentSpatialLayer{ -1 }; int16_t currentTemporalLayer{ -1 }; bool ignoreDtx{ false }; + + private: + PictureIdList spatialLayerPictureIdList; + PictureIdList temporalLayerPictureIdList; }; class PayloadDescriptorHandler diff --git a/worker/src/RTC/Codecs/VP9.cpp b/worker/src/RTC/Codecs/VP9.cpp index dea55d37c3..5fdde5b211 100644 --- a/worker/src/RTC/Codecs/VP9.cpp +++ b/worker/src/RTC/Codecs/VP9.cpp @@ -242,78 +242,82 @@ namespace RTC // clang-format on // Upgrade current spatial layer if needed. - if (context->GetTargetSpatialLayer() > context->GetCurrentSpatialLayer()) - { - if (this->payloadDescriptor->isKeyFrame) - { - MS_DEBUG_DEV( - "upgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 ":%" PRIu8 - ")", - context->GetCurrentSpatialLayer(), - context->GetTargetSpatialLayer(), - packetSpatialLayer, - packetTemporalLayer); - - tmpSpatialLayer = context->GetTargetSpatialLayer(); - tmpTemporalLayer = 0; // Just in case. - } - } - // Downgrade current spatial layer if needed. - else if (context->GetTargetSpatialLayer() < context->GetCurrentSpatialLayer()) + if (!isOldPacket) { - // In K-SVC we must wait for a keyframe. - if (context->IsKSvc()) + if (context->GetTargetSpatialLayer() > context->GetCurrentSpatialLayer()) { if (this->payloadDescriptor->isKeyFrame) - // clang-format on { MS_DEBUG_DEV( - "downgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 - ":%" PRIu8 ") after keyframe (K-SVC)", + "upgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 ":%" PRIu8 + ") old:%d", context->GetCurrentSpatialLayer(), context->GetTargetSpatialLayer(), packetSpatialLayer, - packetTemporalLayer); + packetTemporalLayer, + isOldPacket); tmpSpatialLayer = context->GetTargetSpatialLayer(); tmpTemporalLayer = 0; // Just in case. } } - // In full SVC we do not need a keyframe. - else + // Downgrade current spatial layer if needed. + else if (context->GetTargetSpatialLayer() < context->GetCurrentSpatialLayer()) { - // clang-format off - if ( - packetSpatialLayer == context->GetTargetSpatialLayer() && - this->payloadDescriptor->e - ) - // clang-format on + // In K-SVC we must wait for a keyframe. + if (context->IsKSvc()) { - MS_DEBUG_DEV( - "downgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 - ":%" PRIu8 ") without keyframe (full SVC)", - context->GetCurrentSpatialLayer(), - context->GetTargetSpatialLayer(), - packetSpatialLayer, - packetTemporalLayer); - - tmpSpatialLayer = context->GetTargetSpatialLayer(); - tmpTemporalLayer = 0; // Just in case. + if (this->payloadDescriptor->isKeyFrame) + // clang-format on + { + MS_DEBUG_DEV( + "downgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 + ":%" PRIu8 ") after keyframe (K-SVC)", + context->GetCurrentSpatialLayer(), + context->GetTargetSpatialLayer(), + packetSpatialLayer, + packetTemporalLayer); + + tmpSpatialLayer = context->GetTargetSpatialLayer(); + tmpTemporalLayer = 0; // Just in case. + } + } + // In full SVC we do not need a keyframe. + else + { + // clang-format off + if ( + packetSpatialLayer == context->GetTargetSpatialLayer() && + this->payloadDescriptor->e + ) + // clang-format on + { + MS_DEBUG_DEV( + "downgrading tmpSpatialLayer from %" PRIu16 " to %" PRIu16 " (packet:%" PRIu8 + ":%" PRIu8 ") without keyframe (full SVC)", + context->GetCurrentSpatialLayer(), + context->GetTargetSpatialLayer(), + packetSpatialLayer, + packetTemporalLayer); + + tmpSpatialLayer = context->GetTargetSpatialLayer(); + tmpTemporalLayer = 0; // Just in case. + } } } } - // Unless old packet filter spatial layers that are either + // Filter spatial layers that are either // * higher than current one // * different than the current one when KSVC is enabled and this is not a keyframe // (interframe p bit = 1) + uint16_t tmpSpatialLayerCheck = + isOldPacket ? context->GetCurrentSpatialLayer(this->payloadDescriptor->pictureId) + : tmpSpatialLayer; // clang-format off if ( - !isOldPacket && - ( - packetSpatialLayer > tmpSpatialLayer || - (context->IsKSvc() && this->payloadDescriptor->p && packetSpatialLayer != tmpSpatialLayer) - ) + packetSpatialLayer > tmpSpatialLayerCheck || + (context->IsKSvc() && this->payloadDescriptor->p && packetSpatialLayer != tmpSpatialLayerCheck) ) // clang-format on { @@ -369,12 +373,15 @@ namespace RTC tmpTemporalLayer = context->GetTargetTemporalLayer(); } } + } - // Filter temporal layers higher than current one. - if (packetTemporalLayer > tmpTemporalLayer) - { - return false; - } + // Filter temporal layers higher than current one. + uint16_t tmpTemporalLayerCheck = + isOldPacket ? context->GetCurrentTemporalLayer(this->payloadDescriptor->pictureId) + : tmpTemporalLayer; + if (packetTemporalLayer > tmpTemporalLayerCheck) + { + return false; } // Set marker bit if needed. @@ -394,13 +401,13 @@ namespace RTC // Update current spatial layer if needed. if (tmpSpatialLayer != context->GetCurrentSpatialLayer()) { - context->SetCurrentSpatialLayer(tmpSpatialLayer); + context->SetCurrentSpatialLayer(tmpSpatialLayer, this->payloadDescriptor->pictureId); } // Update current temporal layer if needed. if (tmpTemporalLayer != context->GetCurrentTemporalLayer()) { - context->SetCurrentTemporalLayer(tmpTemporalLayer); + context->SetCurrentTemporalLayer(tmpTemporalLayer, this->payloadDescriptor->pictureId); } return true; diff --git a/worker/test/src/RTC/Codecs/TestVP9.cpp b/worker/test/src/RTC/Codecs/TestVP9.cpp index 77dec6365c..0f2388dd2a 100644 --- a/worker/test/src/RTC/Codecs/TestVP9.cpp +++ b/worker/test/src/RTC/Codecs/TestVP9.cpp @@ -10,11 +10,11 @@ constexpr uint16_t MaxPictureId = (1 << 15) - 1; Codecs::VP9::PayloadDescriptor* CreateVP9Packet( uint8_t* buffer, size_t bufferLen, uint16_t pictureId, uint8_t tlIndex) { - buffer[0] = 0xAD; // I and L bits + buffer[0] = 0xAD; // I, L, B, E bits uint16_t netPictureId = htons(pictureId); std::memcpy(buffer + 1, &netPictureId, 2); buffer[1] |= 0x80; - buffer[3] = tlIndex << 6; + buffer[3] = (tlIndex << 5) | (1 << 4); // tlIndex, switchingUpPoint auto* payloadDescriptor = Codecs::VP9::Parse(buffer, bufferLen); @@ -75,4 +75,110 @@ SCENARIO("process VP9 payload descriptor", "[codecs][vp9]") forwarded = ProcessVP9Packet(context, 1, 1); REQUIRE_FALSE(forwarded); } + + SECTION("test PayloadDescriptorHandler") + { + RTC::Codecs::EncodingContext::Params params; + params.spatialLayers = 1; + params.temporalLayers = 3; + + Codecs::VP9::EncodingContext context(params); + + uint16_t start = MaxPictureId - 2000; + + context.SetCurrentTemporalLayer(0, start + 0); + context.SetCurrentTemporalLayer(1, start + 1); + context.SetCurrentTemporalLayer(2, start + 5); + context.SetCurrentTemporalLayer(0, start + 6); + + REQUIRE(context.GetCurrentTemporalLayer(start + 0) == 0); + REQUIRE(context.GetCurrentTemporalLayer(start + 1) == 1); + REQUIRE(context.GetCurrentTemporalLayer(start + 2) == 1); + REQUIRE(context.GetCurrentTemporalLayer(start + 5) == 2); + REQUIRE(context.GetCurrentTemporalLayer(start + 6) == 0); + + context.SetCurrentTemporalLayer(1, start + 1000); + context.SetCurrentTemporalLayer(2, start + 1001); // This will drop the first item. + + REQUIRE(context.GetCurrentTemporalLayer(start + 1000) == 1); + REQUIRE(context.GetCurrentTemporalLayer(start + 0) == 1); // It will get the item at start+1. + + context.SetCurrentTemporalLayer(0, 0); // This will drop items from start to start+999. + + REQUIRE(context.GetCurrentTemporalLayer(0) == 0); + REQUIRE(context.GetCurrentTemporalLayer(start + 0) == 1); // It will get the item at start+1000. + } + + SECTION("drop packets that belong to other temporal layers with unordered pictureID") + { + RTC::Codecs::EncodingContext::Params params; + params.spatialLayers = 1; + params.temporalLayers = 3; + + Codecs::VP9::EncodingContext context(params); + context.SyncRequired(); + context.SetCurrentSpatialLayer(0, 0); + context.SetTargetSpatialLayer(0); + + uint16_t start = MaxPictureId - 20; + std::vector> packets = { + // targetTemporalLayer=0 + { start, 0, 0, true }, + { start, 1, -1, false }, + { start + 1, 0, -1, true }, + { start + 1, 1, -1, false }, + { start + 2, 0, -1, true }, + { start + 2, 1, -1, false }, + // targetTemporalLayer=1 + { start + 10, 0, 1, true }, + { start + 10, 1, -1, true }, + { start + 11, 0, -1, true }, + { start + 11, 1, -1, true }, + + { start + 3, 0, -1, true }, // old packet + { start + 3, 1, -1, false }, + { start + 12, 0, -1, true }, + { start + 12, 1, -1, true }, + // targetTemporalLayer=0 + { start + 14, 0, 0, true }, + { start + 14, 1, -1, false }, + { start + 13, 0, -1, true }, // old packet + { start + 13, 1, -1, true }, + // targetTemporalLayer=1 + { start + 15, 0, 1, true }, + { start + 15, 1, -1, true }, + // targetTemporalLayer=0 + { 0, 0, 0, true }, + { 0, 1, -1, false }, + { 1, 0, -1, true }, + { 1, 1, -1, false }, + { start + 16, 0, -1, true }, // old packet + { start + 16, 1, -1, true }, + }; + + for (const auto& packet : packets) + { + auto pictureId = std::get<0>(packet); + auto tlIndex = std::get<1>(packet); + auto targetTemporalLayer = std::get<2>(packet); + auto shouldForward = std::get<3>(packet); + + if (targetTemporalLayer >= 0) + { + context.SetTargetTemporalLayer(targetTemporalLayer); + } + + auto forwarded = ProcessVP9Packet(context, pictureId, tlIndex); + + if (shouldForward) + { + REQUIRE(forwarded); + REQUIRE(forwarded->pictureId == pictureId); + } + else + { + REQUIRE_FALSE(forwarded); + } + } + } }