/* -----------------------------------------------------------------------------
The copyright in this software is being made available under the Clear BSD
License, included below. No patent rights, trademark rights and/or
other Intellectual Property Rights other than the copyrights concerning
the Software are granted under this license.
The Clear BSD License
Copyright (c) 2018-2024, Fraunhofer-Gesellschaft zur Förderung der angewandten Forschung e.V. & The VVdeC Authors.
All rights reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted (subject to the limitations in the disclaimer below) provided that
the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from this
software without specific prior written permission.
NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY
THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
------------------------------------------------------------------------------------------- */
/** \file Prediction.cpp
\brief prediction class
*/
#define DONT_UNDEF_SIZE_AWARE_PER_EL_OP
#include "IntraPrediction.h"
#include "Unit.h"
#include "UnitTools.h"
#include "Buffer.h"
#include "dtrace_next.h"
#include "Rom.h"
#include <memory.h>
#include <array>
#include "CommonLib/InterpolationFilter.h"
#include "CommonLib/TimeProfiler.h"
namespace vvdec
{
// ====================================================================================================================
// Tables
// ====================================================================================================================
const uint8_t IntraPrediction::m_aucIntraFilter[MAX_NUM_CHANNEL_TYPE][MAX_INTRA_FILTER_DEPTHS] =
{
{ // Luma
24, // 1xn
24, // 2xn
24, // 4xn
14, // 8xn
2, // 16xn
0, // 32xn
0, // 64xn
0, // 128xn
},
{ // Chroma
40, // 1xn
40, // 2xn
40, // 4xn
28, // 8xn
4, // 16xn
0, // 32xn
0, // 64xn
0, // 128xn
}
};
const TFilterCoeff g_intraGaussFilter[32][4] = {
{ 16, 32, 16, 0 },
{ 16, 32, 16, 0 },
{ 15, 31, 17, 1 },
{ 15, 31, 17, 1 },
{ 14, 30, 18, 2 },
{ 14, 30, 18, 2 },
{ 13, 29, 19, 3 },
{ 13, 29, 19, 3 },
{ 12, 28, 20, 4 },
{ 12, 28, 20, 4 },
{ 11, 27, 21, 5 },
{ 11, 27, 21, 5 },
{ 10, 26, 22, 6 },
{ 10, 26, 22, 6 },
{ 9, 25, 23, 7 },
{ 9, 25, 23, 7 },
{ 8, 24, 24, 8 },
{ 8, 24, 24, 8 },
{ 7, 23, 25, 9 },
{ 7, 23, 25, 9 },
{ 6, 22, 26, 10 },
{ 6, 22, 26, 10 },
{ 5, 21, 27, 11 },
{ 5, 21, 27, 11 },
{ 4, 20, 28, 12 },
{ 4, 20, 28, 12 },
{ 3, 19, 29, 13 },
{ 3, 19, 29, 13 },
{ 2, 18, 30, 14 },
{ 2, 18, 30, 14 },
{ 1, 17, 31, 15 },
{ 1, 17, 31, 15 },
};
void GetLumaRecPixel420Core (const int width,const int height, const Pel* pRecSrc0,const ptrdiff_t iRecStride,Pel* pDst0,const ptrdiff_t iDstStride)
{
for( int y = 0; y < height; y++ )
{
for( int x = 0; x < width; x ++ )
{
pDst0[x + 0 ] = ( pRecSrc0[( (x + 0 ) << 1 ) ] * 2
+ pRecSrc0[( (x + 0 ) << 1 ) + 1] * 1
+ pRecSrc0[( (x + 0 ) << 1 ) - 1] * 1
+ pRecSrc0[( (x + 0 ) << 1 ) + iRecStride] * 2
+ pRecSrc0[( (x + 0 ) << 1 ) + 1 + iRecStride] * 1
+ pRecSrc0[( (x + 0 ) << 1 ) - 1 + iRecStride] * 1
+ 4 ) >> 3;
}
pDst0 += iDstStride;
pRecSrc0 += (iRecStride<<1);
}
}
/** Function for deriving planar intra prediction. This function derives the prediction samples for planar mode (intra coding).
*/
//NOTE: Bit-Limit - 24-bit source
void xPredIntraPlanarCore( const CPelBuf &pSrc, PelBuf &pDst, const SPS& sps )
{
// with some optimizations gcc gives spurious "-Wmaybe-uninitialized" warnings here
GCC_WARNING_DISABLE_maybe_uninitialized
const uint32_t width = pDst.width;
const uint32_t height = pDst.height;
const uint32_t log2W = getLog2( width );
const uint32_t log2H = getLog2( height );
int leftColumn[MAX_CU_SIZE + 1], topRow[MAX_CU_SIZE + 1], bottomRow[MAX_CU_SIZE], rightColumn[MAX_CU_SIZE];
const uint32_t offset = 1 << (log2W + log2H);
// Get left and above reference column and row
for( int k = 0; k < width + 1; k++ )
{
topRow[k] = pSrc.at( k + 1, 0 );
}
for( int k = 0; k < height + 1; k++ )
{
leftColumn[k] = pSrc.at( 0, k + 1 );
}
// Prepare intermediate variables used in interpolation
int bottomLeft = leftColumn[height];
int topRight = topRow[width];
for( int k = 0; k < width; k++ )
{
bottomRow[k] = bottomLeft - topRow[k];
topRow[k] = topRow[k] << log2H;
}
for( int k = 0; k < height; k++ )
{
rightColumn[k] = topRight - leftColumn[k];
leftColumn[k] = leftColumn[k] << log2W;
}
const uint32_t finalShift = 1 + log2W + log2H;
const ptrdiff_t stride = pDst.stride;
Pel* pred = pDst.buf;
for( int y = 0; y < height; y++, pred += stride )
{
int horPred = leftColumn[y];
for( int x = 0; x < width; x++ )
{
horPred += rightColumn[y];
topRow[x] += bottomRow[x];
int vertPred = topRow[x];
pred[x] = ( ( horPred << log2H ) + ( vertPred << log2W ) + offset ) >> finalShift;
}
}
GCC_WARNING_RESET
}
void IntraPredSampleFilterCore(Pel *ptrSrc,const ptrdiff_t srcStride,PelBuf &piPred,const uint32_t uiDirMode,const ClpRng& clpRng)
{
const CPelBuf srcBuf ( ptrSrc, ( SizeType ) srcStride, ( SizeType ) srcStride );
const int iWidth = piPred.width;
const int iHeight = piPred.height;
PelBuf dstBuf = piPred;
const int scale = ((getLog2(iWidth) - 2 + getLog2(iHeight) - 2 + 2) >> 2);
CHECK_RECOVERABLE(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
#if 1
if( uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX )
{
for( int y = 0; y < iHeight; y++ )
{
const int wT = 32 >> std::min(31, ((y << 1) >> scale));
// const Pel left = srcBuf.at(y + 1, 1);
const Pel left = srcBuf.at(0, y + 1 );
for (int x = 0; x < iWidth; x++)
{
const int wL = 32 >> std::min(31, ((x << 1) >> scale));
const Pel top = srcBuf.at(x + 1, 0);
const Pel val = dstBuf.at(x, y);
dstBuf.at(x, y) = val + ((wL * (left - val) + wT * (top - val) + 32) >> 6);
}
}
}
#else
const int lev[4]={std::min(3,iWidth),std::min(6,iWidth),std::min(12,iWidth),std::min(24,iWidth)};
if (uiDirMode == PLANAR_IDX)
{
for (int y = 0; y < iHeight; y++)
{
int wT = 32 >> std::min(31, ((y << 1) >> scale));
const Pel left = srcBuf.at(0, y + 1);
if (wT)
{
for (int x = 0; x < iWidth; x++)
{
const Pel top = srcBuf.at(x + 1, 0);
int wL = 32 >> std::min(31, ((x << 1) >> scale));
dstBuf.at(x, y) = ClipPel((wL * left + wT * top + (64 - wL - wT) * dstBuf.at(x, y) + 32) >> 6, clpRng);
}
}
else
{
for (int x = 0; x < lev[scale]; x++) // bis wL 0 ist, das ist bei x lev[scale]
{
int wL = 32 >> std::min(31, ((x << 1) >> scale));
dstBuf.at(x, y) = ClipPel((wL * left + (64 - wL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
}
}
}
}
else if (uiDirMode == DC_IDX)
{
const Pel topLeft = srcBuf.at(0, 0);
for (int y = 0; y < iHeight; y++)
{
int wT = 32 >> std::min(31, ((y << 1) >> scale));
const Pel left = srcBuf.at(0, y + 1);
if (wT)
{
for (int x = 0; x < iWidth; x++)
{
const Pel top = srcBuf.at(x + 1, 0);
int wL = 32 >> std::min(31, ((x << 1) >> scale));
int wTL = (wL >> 4) + (wT >> 4);
dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
}
}
else
{
for (int x = 0; x < lev[scale]; x++)
{
const Pel top = srcBuf.at(x + 1, 0);
int wL = 32 >> std::min(31, ((x << 1) >> scale));
int wTL = (wL >> 4) + (wT >> 4);
dstBuf.at(x, y) = ClipPel((wL * left + wT * top - wTL * topLeft + (64 - wL - wT + wTL) * dstBuf.at(x, y) + 32) >> 6, clpRng);
}
}
}
}
#endif
}
template<typename T>
void IntraPredAngleCore(T* pDstBuf,const ptrdiff_t dstStride,T* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng)
{
for (int y = 0; y<height; y++ )
{
const int deltaInt = deltaPos >> 5;
const int deltaFract = deltaPos & ( 32 - 1 );
Pel p[4];
int refMainIndex = deltaInt + 1;
const TFilterCoeff *f = &ff[deltaFract << 2];
for( int x = 0; x < width; x++, refMainIndex++ )
{
p[0] = refMain[refMainIndex - 1];
p[1] = refMain[refMainIndex ];
p[2] = refMain[refMainIndex + 1];
p[3] = refMain[refMainIndex + 2];
pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
{
pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
}
}
deltaPos += intraPredAngle;
}
}
template<typename T>
void IntraPredAngleChroma(T* pDstBuf,const ptrdiff_t dstStride,int16_t* pBorder,int width,int height,int deltaPos,int intraPredAngle)
{
for (int y = 0; y<height; y++)
{
const int deltaInt = deltaPos >> 5;
const int deltaFract = deltaPos & (32 - 1);
// Do linear filtering
const Pel *pRM = pBorder + deltaInt + 1;
int lastRefMainPel = *pRM++;
for( int x = 0; x < width; pRM++, x++ )
{
int thisRefMainPel = *pRM;
pDstBuf[x + 0] = ( Pel ) ( ( ( 32 - deltaFract )*lastRefMainPel + deltaFract*thisRefMainPel + 16 ) >> 5 );
lastRefMainPel = thisRefMainPel;
}
deltaPos += intraPredAngle;
pDstBuf += dstStride;
}
}
// ====================================================================================================================
// Constructor / destructor / initialize
// ====================================================================================================================
IntraPrediction::IntraPrediction() : m_currChromaFormat( NUM_CHROMA_FORMAT )
{
IntraPredAngleCore4 = IntraPredAngleCore;
IntraPredAngleCore8 = IntraPredAngleCore;
IntraPredAngleChroma4 = IntraPredAngleChroma;
IntraPredAngleChroma8 = IntraPredAngleChroma;
IntraPredSampleFilter8 = IntraPredSampleFilterCore;
IntraPredSampleFilter16 = IntraPredSampleFilterCore;
xPredIntraPlanar = xPredIntraPlanarCore;
GetLumaRecPixel420 = GetLumaRecPixel420Core;
}
IntraPrediction::~IntraPrediction()
{
destroy();
}
void IntraPrediction::destroy()
{
}
void IntraPrediction::init(ChromaFormat chromaFormatIDC, const unsigned bitDepthY)
{
// if it has been initialised before, but the chroma format has changed, release the memory and start again.
if (m_currChromaFormat != chromaFormatIDC)
{
destroy();
}
m_currChromaFormat = chromaFormatIDC;
std::fill_n( m_neighborSize, 3, 0 );
m_lastCUidx = -1;
#if ENABLE_SIMD_OPT_INTRAPRED && defined( TARGET_SIMD_X86 )
initIntraPredictionX86();
#endif
}
// ====================================================================================================================
// Public member functions
// ====================================================================================================================
// Function for calculating DC value of the reference samples used in Intra prediction
//NOTE: Bit-Limit - 25-bit source
Pel IntraPrediction::xGetPredValDc( const CPelBuf &pSrc, const Size &dstSize, const int mrlIdx )
{
CHECK_RECOVERABLE( dstSize.width == 0 || dstSize.height == 0, "Empty area provided" );
int idx, sum = 0;
Pel dcVal;
const int width = dstSize.width;
const int height = dstSize.height;
const auto denom = (width == height) ? (width << 1) : std::max(width,height);
const auto divShift = getLog2(denom);
const auto divOffset = (denom >> 1);
if( width >= height )
{
for( idx = 0; idx < width; idx++ )
{
sum += pSrc.at( mrlIdx + 1 + idx, 0 );
}
}
if( width <= height )
{
for( idx = 0; idx < height; idx++ )
{
sum += pSrc.at( 0, mrlIdx + 1 + idx );
}
}
dcVal = (sum + divOffset) >> divShift;
return dcVal;
}
int IntraPrediction::getWideAngle( int width, int height, int predMode )
{
if ( predMode > DC_IDX && predMode <= VDIA_IDX )
{
int modeShift[] = { 0, 6, 10, 12, 14, 15 };
int deltaSize = abs(getLog2(width) - getLog2(height));
if (width > height && predMode < 2 + modeShift[deltaSize])
{
predMode += (VDIA_IDX - 1);
}
else if (height > width && predMode > VDIA_IDX - modeShift[deltaSize])
{
predMode -= (VDIA_IDX - 1);
}
}
return predMode;
}
void IntraPrediction::setReferenceArrayLengths( const CompArea &area )
{
// set Top and Left reference samples length
const int width = area.width;
const int height = area.height;
m_leftRefLength = (height << 1);
m_topRefLength = (width << 1);
}
void IntraPrediction::predIntraAng( const ComponentID compId, PelBuf &piPred, const CodingUnit &cu, const bool useFilteredPredSamples )
{
const ComponentID compID = MAP_CHROMA( compId );
const ChannelType channelType = toChannelType( compID );
const int iWidth = piPred.width;
const int iHeight = piPred.height;
const Size cuSize = Size( cu.blocks[compId].width, cu.blocks[compId].height );
CHECK_RECOVERABLE( CU::isMIP(cu, toChannelType(compId)), "We should not get here for MIP." );
const uint32_t uiDirMode = isLuma( compId ) && cu.bdpcmMode() ? BDPCM_IDX : !isLuma(compId) && cu.bdpcmModeChroma() ? BDPCM_IDX : PU::getFinalIntraMode(cu, channelType);
CHECKD( iWidth == 2, "Width of 2 is not supported" );
const int multiRefIdx = ( compID == COMPONENT_Y ) ? cu.multiRefIdx() : 0;
const bool useISP = cu.ispMode() && isLuma( compID );
const int srcStride = m_topRefLength + 1 + multiRefIdx;
const int srcHStride = m_leftRefLength + 1 + multiRefIdx;
const ClpRng& clpRng ( cu.slice->clpRng( compID ) );
bool doPDPC = ( iWidth >= MIN_TB_SIZEY && iHeight >= MIN_TB_SIZEY ) && multiRefIdx == 0;
const PelBuf& srcBuf = cu.ispMode() && isLuma(compID) ? getISPBuffer( useFilteredPredSamples ) : PelBuf(getPredictorPtr(compID, useFilteredPredSamples), srcStride, srcHStride);
switch (uiDirMode)
{
case(PLANAR_IDX): xPredIntraPlanar(srcBuf, piPred, *cu.sps); break;
case(DC_IDX): xPredIntraDc (srcBuf, piPred, channelType, false, multiRefIdx); break;
case(BDPCM_IDX): xPredIntraBDPCM(srcBuf, piPred, isLuma(compID) ? cu.bdpcmMode() : cu.bdpcmModeChroma(), clpRng); break;
case(2):
case(DIA_IDX):
case(VDIA_IDX):
if (getWideAngle(useISP ? cuSize.width : iWidth, useISP ? cuSize.height : iHeight, uiDirMode) == static_cast<int>(uiDirMode)) // check if uiDirMode is not wide-angle
{
xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize );
break;
}
default: xPredIntraAng(srcBuf, piPred, channelType, uiDirMode, clpRng, *cu.sps, multiRefIdx, useFilteredPredSamples, doPDPC, useISP, cuSize); break;
}
if( doPDPC && (uiDirMode == PLANAR_IDX || uiDirMode == DC_IDX ) )
{
if (iWidth>8)
IntraPredSampleFilter16(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng);
else
IntraPredSampleFilter8(srcBuf.buf,srcBuf.stride,piPred,uiDirMode,clpRng);
}
}
void IntraPrediction::predIntraChromaLM( const ComponentID compID, PelBuf& piPred, const CodingUnit& cu, const CompArea& chromaArea, int intraDir )
{
int iLumaStride = 0;
PelBuf Temp;
if( (intraDir == MDLM_L_IDX) || (intraDir == MDLM_T_IDX) )
{
iLumaStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) );
}
else
{
iLumaStride = MAX_TU_SIZE_FOR_PROFILE + 1;
Temp = PelBuf( m_piYuvExt[1] + iLumaStride + 1, iLumaStride, Size( chromaArea ) );
}
int a, b, iShift;
xGetLMParameters( cu, compID, chromaArea, a, b, iShift );
////// final prediction
piPred.copyFrom( Temp );
piPred.linearTransform( a, iShift, b, true, cu.slice->clpRng( compID ) );
}
void IntraPrediction::xPredIntraDc( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const bool enableBoundaryFilter, const int mrlIdx )
{
const Pel dcval = xGetPredValDc( pSrc, pDst, mrlIdx );
pDst.fill( dcval );
}
// Function for deriving the angular Intra predictions
void IntraPredAngleCore(Pel *pDstBuf,const int dstStride,Pel* refMain,int width,int height,int deltaPos,int intraPredAngle,const TFilterCoeff *ff,const bool useCubicFilter,const ClpRng& clpRng)
{
for (int y = 0; y<height; y++ )
{
const int deltaInt = deltaPos >> 5;
const int deltaFract = deltaPos & ( 32 - 1 );
Pel p[4];
int refMainIndex = deltaInt + 1;
const TFilterCoeff *f = &ff[deltaFract << 2];
for( int x = 0; x < width; x++, refMainIndex++ )
{
p[0] = refMain[refMainIndex - 1];
p[1] = refMain[refMainIndex ];
p[2] = refMain[refMainIndex + 1];
p[3] = refMain[refMainIndex + 2];
pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(f[0] * p[0]) + static_cast<int>(f[1] * p[1]) + static_cast<int>(f[2] * p[2]) + static_cast<int>(f[3] * p[3]) + 32) >> 6);
if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
{
pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
}
}
deltaPos += intraPredAngle;
}
}
/** Function for deriving the simplified angular intra predictions.
*
* This function derives the prediction samples for the angular mode based on the prediction direction indicated by
* the prediction mode index. The prediction direction is given by the displacement of the bottom row of the block and
* the reference row above the block in the case of vertical prediction or displacement of the rightmost column
* of the block and reference column left from the block in the case of the horizontal prediction. The displacement
* is signalled at 1/32 pixel accuracy. When projection of the predicted pixel falls inbetween reference samples,
* the predicted value for the pixel is linearly interpolated from the reference samples. All reference samples are taken
* from the extended main reference.
*/
//NOTE: Bit-Limit - 25-bit source
void IntraPrediction::xPredIntraAng( const CPelBuf &pSrc, PelBuf &pDst, const ChannelType channelType, const uint32_t dirMode, const ClpRng& clpRng, const SPS& sps,
int multiRefIdx,
const bool useFilteredPredSamples ,
bool &doPDPC,
const bool useISP,
const Size cuSize
)
{
int width =int(pDst.width);
int height=int(pDst.height);
CHECK_RECOVERABLE( !( dirMode > DC_IDX && dirMode < NUM_LUMA_MODE ), "Invalid intra dir" );
int predMode = useISP ? getWideAngle( cuSize.width, cuSize.height, dirMode ) : getWideAngle( width, height, dirMode );
const bool bIsModeVer = predMode >= DIA_IDX;
const int intraPredAngleMode = (bIsModeVer) ? predMode - VER_IDX : -(predMode - HOR_IDX);
const int absAngMode = abs(intraPredAngleMode);
const int signAng = intraPredAngleMode < 0 ? -1 : 1;
// Set bitshifts and scale the angle parameter to block size
static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
static const int invAngTable[32] = {
0, 16384, 8192, 5461, 4096, 2731, 2048, 1638, 1365, 1170, 1024, 910, 819, 712, 630, 565,
512, 468, 420, 364, 321, 287, 256, 224, 191, 161, 128, 96, 64, 48, 32, 16
}; // (512 * 32) / Angle
int invAngle = invAngTable[absAngMode];
int absAng = angTable [absAngMode];
int intraPredAngle = signAng * absAng;
Pel* refMain;
Pel* refSide;
Pel refAbove[2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
Pel refLeft [2 * MAX_CU_SIZE + 3 + 33 * MAX_REF_LINE_IDX];
// Initialize the Main and Left reference array.
if (intraPredAngle < 0)
{
for (int x = 0; x <= width + 1 + multiRefIdx; x++)
{
refAbove[x + height] = pSrc.at(x, 0);
}
for (int y = 0; y <= height + 1 + multiRefIdx; y++)
{
refLeft[y + width] = pSrc.at(0, y);
}
refMain = bIsModeVer ? refAbove + height : refLeft + width;
refSide = bIsModeVer ? refLeft + width : refAbove + height;
// Extend the Main reference to the left.
int sizeSide = bIsModeVer ? height : width;
for (int k = -sizeSide; k <= -1; k++)
{
refMain[k] = refSide[std::min((-k * invAngle + 256) >> 9, sizeSide)];
}
}
else
{
//for (int x = 0; x <= m_topRefLength + multiRefIdx; x++)
//{
// refAbove[x] = pSrc.at(x, 0);
//}
memcpy( refAbove, pSrc.buf, ( m_topRefLength + multiRefIdx + 1 ) * sizeof( Pel ) );
for (int y = 0; y <= m_leftRefLength + multiRefIdx; y++)
{
refLeft[y] = pSrc.at(0, y);
}
refMain = bIsModeVer ? refAbove : refLeft;
refSide = bIsModeVer ? refLeft : refAbove;
// Extend main reference to right using replication
const int log2Ratio = getLog2(width) - getLog2(height);
const int s = std::max<int>(0, bIsModeVer ? log2Ratio : -log2Ratio);
const int maxIndex = (multiRefIdx << s) + 2;
const int refLength = bIsModeVer ? m_topRefLength : m_leftRefLength;
const Pel val = refMain[refLength + multiRefIdx];
for (int z = 1; z <= maxIndex; z++)
{
refMain[refLength + multiRefIdx + z] = val;
}
}
// swap width/height if we are doing a horizontal mode:
Pel tempArray[MAX_TB_SIZEY*MAX_TB_SIZEY];
const ptrdiff_t dstStride = bIsModeVer ? pDst.stride : MAX_TB_SIZEY;
Pel *pDstBuf = bIsModeVer ? pDst.buf : tempArray;
if (!bIsModeVer)
{
std::swap(width, height);
}
// compensate for line offset in reference line buffers
refMain += multiRefIdx;
refSide += multiRefIdx;
if( intraPredAngle == 0 ) // pure vertical or pure horizontal
{
if( doPDPC )
{
const int scale = ( ( getLog2( width ) - 2 + getLog2( height ) - 2 + 2 ) >> 2 );
CHECK_RECOVERABLE(scale < 0 || scale > 31, "PDPC: scale < 0 || scale > 31");
const int lev[4]={std::min(3,width),std::min(6,width),std::min(12,width),std::min(24,width)};
const Pel topLeft = pSrc.at(0, 0);
for( int y = 0; y < height; y++ )
{
const Pel left = refSide[y + 1];
Pel *line = &pDstBuf[y * dstStride];
for( int x = 0; x < lev[scale]; x++ )
{
int wL = 32 >> std::min( 31, ( ( x << 1 ) >> scale ) );
*line++ = ClipPel( ( wL * ( left - topLeft ) + ( refMain[x + 1] << 6 ) + 32 ) >> 6, clpRng );
}
memcpy( line, refMain + lev[scale] + 1, ( width - lev[scale] ) * sizeof( Pel ) );
}
}
else
{
for( int y = 0; y < height; y++ )
{
memcpy( pDstBuf + y * dstStride, refMain + 1, width * sizeof( Pel ) );
}
}
}
else
{
Pel *pDsty=pDstBuf;
if( !(0 == (absAng & 0x1F)) )
{
if( isLuma(channelType) )
{
int deltaPos = intraPredAngle * (1 + multiRefIdx);
bool interpolationFlag = false, filterFlag = false;
const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
const int log2Size = ((getLog2(width) + getLog2(height)) >> 1);
CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
filterFlag = (diff > m_aucIntraFilter[channelType][log2Size]);
if( filterFlag )
{
const bool isRefFilter = 0 == ( absAng & 0x1F );
interpolationFlag = !isRefFilter;
}
const bool useCubicFilter = useISP ? true : ( !interpolationFlag || multiRefIdx > 0 );
const TFilterCoeff *f = (useCubicFilter) ? InterpolationFilter::getChromaFilterTable(0) : g_intraGaussFilter[0];
if( ( width & 7 ) == 0 )
{
IntraPredAngleCore8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng);
}
else if( ( width & 3 ) == 0 )
{
IntraPredAngleCore4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle,f,useCubicFilter,clpRng);
}
else
{
CHECK_RECOVERABLE( !useISP, "should not happen" );
for (int y = 0; y<height; y++ )
{
const int deltaInt = deltaPos >> 5;
const int deltaFract = deltaPos & ( 32 - 1 );
Pel p[4];
int refMainIndex = deltaInt + 1;
const TFilterCoeff *ff = &f[deltaFract << 2];
for( int x = 0; x < width; x++, refMainIndex++ )
{
p[0] = refMain[refMainIndex - 1];
p[1] = refMain[refMainIndex ];
p[2] = refMain[refMainIndex + 1];
p[3] = refMain[refMainIndex + 2];
pDstBuf[y*dstStride + x] = static_cast<Pel>((static_cast<int>(ff[0] * p[0]) + static_cast<int>(ff[1] * p[1]) + static_cast<int>(ff[2] * p[2]) + static_cast<int>(ff[3] * p[3]) + 32) >> 6);
if( useCubicFilter ) // only cubic filter has negative coefficients and requires clipping
{
pDstBuf[y*dstStride + x] = ClipPel( pDstBuf[y*dstStride + x], clpRng );
}
}
deltaPos += intraPredAngle;
}
}
}
else
{
int deltaPos = intraPredAngle * (1 + multiRefIdx);
if ( width >=8 )
{
IntraPredAngleChroma8(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
}
else if( width == 4 )
{
IntraPredAngleChroma4(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
}
else
{
IntraPredAngleChroma(pDstBuf,dstStride,refMain,width,height,deltaPos,intraPredAngle);
}
}
}
else
{
for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
{
const int deltaInt = deltaPos >> 5;
// Just copy the integer samples
memcpy(pDsty,refMain + deltaInt + 1,width*sizeof(Pel));
}
}
pDsty=pDstBuf;
for (int y = 0, deltaPos = intraPredAngle * (1 + multiRefIdx); y<height; y++, deltaPos += intraPredAngle, pDsty += dstStride)
{
int angularScale = 0;
if( intraPredAngle < 0 )
{
doPDPC = false;
}
else if( intraPredAngle > 0 )
{
const int sideSize = predMode >= DIA_IDX ? pDst.height : pDst.width;
const int maxScale = 2;
angularScale = std::min(maxScale, getLog2(sideSize) - (getLog2(3 * invAngle - 2) - 8));
doPDPC &= angularScale >= 0;
}
if( doPDPC )
{
int invAngleSum = 256;
for (int x = 0; x < std::min(3 << angularScale, width); x++)
{
invAngleSum += invAngle;
int wL = 32 >> (2 * x >> angularScale);
Pel left = refSide[y + (invAngleSum >> 9) + 1];
pDsty[x] = pDsty[x] + ((wL * (left - pDsty[x]) + 32) >> 6);
}
}
}
}
// Flip the block if this is the horizontal mode
if( !bIsModeVer )
{
pDst.transposedFrom( CPelBuf( pDstBuf, dstStride, width, height ) );
}
}
void IntraPrediction::xPredIntraBDPCM(const CPelBuf &pSrc, PelBuf &pDst, const uint32_t dirMode, const ClpRng& clpRng )
{
const int wdt = pDst.width;
const int hgt = pDst.height;
const ptrdiff_t strideP = pDst.stride;
const ptrdiff_t strideS = pSrc.stride;
CHECK_RECOVERABLE( !( dirMode == 1 || dirMode == 2 ), "Incorrect BDPCM mode parameter." );
Pel* pred = &pDst.buf[0];
if( dirMode == 1 )
{
Pel val;
for( int y = 0; y < hgt; y++ )
{
val = pSrc.buf[(y + 1) * strideS];
for( int x = 0; x < wdt; x++ )
{
pred[x] = val;
}
pred += strideP;
}
}
else
{
for( int y = 0; y < hgt; y++ )
{
for( int x = 0; x < wdt; x++ )
{
pred[x] = pSrc.buf[x + 1];
}
pred += strideP;
}
}
}
void IntraPrediction::predBlendIntraCiip( PelUnitBuf &predUnit, const CodingUnit &cu )
{
int maxCompID = 1;
if( isChromaEnabled( cu.chromaFormat ) && cu.chromaSize().width > 2 )
{
maxCompID = MAX_NUM_COMPONENT;
}
for( int currCompID = 0; currCompID < maxCompID; currCompID++ )
{
PelBuf& pred = predUnit.bufs[ currCompID ];
const int width = pred.width;
const int height = pred.height;
const ptrdiff_t srcStride = width;
const ptrdiff_t dstStride = pred.stride;
Pel* dstBuf = pred.buf;
const bool isUseFilter = currCompID == 0 && IntraPrediction::useFilteredIntraRefSamples( COMPONENT_Y, cu, cu );
Pel* srcBuf = m_piYuvExt[!isUseFilter];
PelBuf srcAreaBuf( srcBuf, srcStride, width, height );
{
PROFILER_SCOPE_AND_STAGE_EXT( 1, g_timeProfiler, P_INTRAPRED, *cu.cs, compID );
initIntraPatternChType( cu.firstTU, cu.blocks[currCompID], isUseFilter );
predIntraAng( ComponentID( currCompID ), srcAreaBuf, cu, isUseFilter );
}
CHECKD( width == 2, "Width of 2 is not supported" );
const Position posBL = cu.Y().bottomLeft();
const Position posTR = cu.Y().topRight();
const CodingUnit* cuLeft = cu.cs->getCURestricted( posBL.offset( -1, 0 ), cu, CHANNEL_TYPE_LUMA, cu.left );
const CodingUnit* cuAbove = cu.cs->getCURestricted( posTR.offset( 0, -1 ), cu, CHANNEL_TYPE_LUMA, cu.above );
const bool isNeigh0Intra = cuLeft && ( CU::isIntra( *cuLeft ) );
const bool isNeigh1Intra = cuAbove && ( CU::isIntra( *cuAbove ) );
const int wIntra = 3 - !isNeigh0Intra - !isNeigh1Intra;
const int wMerge = 3 - !!isNeigh0Intra - !!isNeigh1Intra;
for( int y = 0; y < height; y++ )
{
for( int x = 0; x < width; x += 4 )
{
dstBuf[y * dstStride + x + 0] = ( wMerge * dstBuf[y * dstStride + x + 0] + wIntra * srcBuf[y * srcStride + x + 0] + 2 ) >> 2;
dstBuf[y * dstStride + x + 1] = ( wMerge * dstBuf[y * dstStride + x + 1] + wIntra * srcBuf[y * srcStride + x + 1] + 2 ) >> 2;
dstBuf[y * dstStride + x + 2] = ( wMerge * dstBuf[y * dstStride + x + 2] + wIntra * srcBuf[y * srcStride + x + 2] + 2 ) >> 2;
dstBuf[y * dstStride + x + 3] = ( wMerge * dstBuf[y * dstStride + x + 3] + wIntra * srcBuf[y * srcStride + x + 3] + 2 ) >> 2;
}
}
}
}
inline int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
const uint32_t uiNumUnitsInPU, const uint32_t unitWidth);
inline int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
const uint32_t uiNumUnitsInPU, const uint32_t unitWidth);
void IntraPrediction::initIntraPatternChType(const TransformUnit &tu, const CompArea &area, const bool bFilterRefSamples)
{
CHECK_RECOVERABLE( area.width == 2, "Width of 2 is not supported" );
const CodingStructure& cs = *tu.cu->cs;
Pel *refBufUnfiltered = m_piYuvExt[PRED_BUF_UNFILTERED];
Pel *refBufFiltered = m_piYuvExt[PRED_BUF_FILTERED];
setReferenceArrayLengths( area );
// ----- Step 1: unfiltered reference samples -----
xFillReferenceSamples( cs.picture->getRecoBuf( area ), refBufUnfiltered, area, tu );
// ----- Step 2: filtered reference samples -----
if( bFilterRefSamples )
{
xFilterReferenceSamples( refBufUnfiltered, refBufFiltered, area, *cs.sps , tu.cu->multiRefIdx() );
}
}
void IntraPrediction::initIntraPatternChTypeISP(const CodingUnit& cu, const CompArea& area, PelBuf& recBuf)
{
const CodingStructure& cs = *cu.cs;
const Position &posLT = area.pos();
bool isLeftAvail = nullptr != cs.getCURestricted( posLT.offset( -1, 0 ), cu, CH_L, posLT.x == cu.lx() ? cu.left : &cu );
bool isAboveAvail = nullptr != cs.getCURestricted( posLT.offset( 0, -1 ), cu, CH_L, posLT.y == cu.ly() ? cu.left : &cu );
// ----- Step 1: unfiltered reference samples -----
if( cu.blocks[area.compID()].x == area.x && cu.blocks[area.compID()].y == area.y )
{
Pel* refBufUnfiltered = m_piYuvExt[PRED_BUF_UNFILTERED];
// With the first subpartition all the CU reference samples are fetched at once in a single call to xFillReferenceSamples
if( cu.ispMode() == HOR_INTRA_SUBPARTITIONS )
{
m_leftRefLength = cu.Y().height << 1;
m_topRefLength = cu.Y().width + area.width;
}
else //if (cu.ispMode() == VER_INTRA_SUBPARTITIONS)
{
m_leftRefLength = cu.Y().height + area.height;
m_topRefLength = cu.Y().width << 1;
}
const int srcStride = m_topRefLength + 1;
const int srcHStride = m_leftRefLength + 1;
m_pelBufISP[0] = m_pelBufISPBase[0] = PelBuf(m_piYuvExt[PRED_BUF_UNFILTERED], srcStride, srcHStride);
m_pelBufISP[1] = m_pelBufISPBase[1] = PelBuf(m_piYuvExt[PRED_BUF_FILTERED], srcStride, srcHStride);
xFillReferenceSamples( cs.picture->getRecoBuf( cu.Y() ), refBufUnfiltered, cu.Y(), isLuma( area.compID() ) ? cu.firstTU : *cu.lastTU );
// After having retrieved all the CU reference samples, the number of reference samples is now adjusted for the current subpartition
m_topRefLength = cu.blocks[area.compID()].width + area.width;
m_leftRefLength = cu.blocks[area.compID()].height + area.height;
}
else
{
//Now we only need to fetch the newly available reconstructed samples from the previously coded TU
Position tuPos = area;
tuPos.relativeTo(cu.Y());
m_pelBufISP[0] = m_pelBufISPBase[0].subBuf(tuPos, area.size());
m_pelBufISP[1] = m_pelBufISPBase[1].subBuf(tuPos, area.size());
PelBuf& dstBuf = m_pelBufISP[0];
m_topRefLength = cu.blocks[area.compID()].width + area.width;
m_leftRefLength = cu.blocks[area.compID()].height + area.height;
const int predSizeHor = m_topRefLength;
const int predSizeVer = m_leftRefLength;
if (cu.ispMode() == HOR_INTRA_SUBPARTITIONS)
{
Pel* src = recBuf.bufAt(0, -1);
Pel* dst = dstBuf.bufAt(1, 0);
for (int i = 0; i < area.width; i++)
{
dst[i] = src[i];
}
Pel sample = src[area.width - 1];
dst += area.width;
for (int i = 0; i < predSizeHor - area.width; i++)
{
dst[i] = sample;
}
if (!isLeftAvail) //if left is not avaible, then it is necessary to fetch these samples for each subpartition
{
Pel* dst = dstBuf.bufAt(0, 0);
Pel sample = src[0];
for (int i = 0; i < predSizeVer + 1; i++)
{
*dst = sample;
dst += dstBuf.stride;
}
}
}
else
{
Pel* src = recBuf.bufAt(-1, 0);
Pel* dst = dstBuf.bufAt(0, 1);
for (int i = 0; i < area.height; i++)
{
*dst = *src;
src += recBuf.stride;
dst += dstBuf.stride;
}
Pel sample = src[-recBuf.stride];
for (int i = 0; i < predSizeVer - area.height; i++)
{
*dst = sample;
dst += dstBuf.stride;
}
if (!isAboveAvail) //if above is not avaible, then it is necessary to fetch these samples for each subpartition
{
Pel* dst = dstBuf.bufAt(0, 0);
Pel sample = recBuf.at(-1, 0);
for (int i = 0; i < predSizeHor + 1; i++)
{
dst[i] = sample;
}
}
}
}
}
void IntraPrediction::xFillReferenceSamples( const CPelBuf &recoBuf, Pel* refBufUnfiltered, const CompArea &area, const TransformUnit &tu )
{
const ChannelType chType = toChannelType( area.compID() );
const CodingUnit &cu = *tu.cu;
const CodingStructure &cs = *cu.cs;
const SPS &sps = *cs.sps;
const PreCalcValues &pcv = *cs.pcv;
const int multiRefIdx = (area.compID() == COMPONENT_Y) ? cu.multiRefIdx() : 0;
const int tuWidth = area.width;
const int tuHeight = area.height;
const int predSize = m_topRefLength;
const int predHSize = m_leftRefLength;
const int predStride = predSize + 1 + multiRefIdx;
const int csx = getChannelTypeScaleX( chType, pcv.chrFormat );
const int csy = getChannelTypeScaleY( chType, pcv.chrFormat );
const int unitWidth = pcv.minCUWidth >> csx;
const int unitHeight = pcv.minCUHeight >> csy;
const int totalAboveUnits = (predSize + (unitWidth - 1)) / unitWidth;
const int totalLeftUnits = (predHSize + (unitHeight - 1)) / unitHeight;
const int totalUnits = totalAboveUnits + totalLeftUnits + 1; //+1 for top-left
const int numAboveUnits = tuWidth / unitWidth;
const int numLeftUnits = tuHeight / unitHeight;
const int numAboveRightUnits = totalAboveUnits - numAboveUnits;
const int numLeftBelowUnits = totalLeftUnits - numLeftUnits;
CHECK_RECOVERABLE( numAboveUnits <= 0 || numLeftUnits <= 0 || numAboveRightUnits <= 0 || numLeftBelowUnits <= 0, "Size not supported" );
// ----- Step 1: analyze neighborhood -----
if( m_lastCUidx == tu.cu->idx && area.compID() != getFirstComponentOfChannel( tu.cu->chType() ) )
{
}
else
{
const Position posLT = area.pos();
const bool sameCTUx = !!( posLT.x & ( pcv.maxCUWidthMask >> csx ) );
const bool sameCTUy = !!( posLT.y & ( pcv.maxCUHeightMask >> csy ) );
const bool sameCTU = sameCTUx && sameCTUy;
m_neighborSize[0] = sameCTU ? 1 : !!cu.cs->getCURestricted( posLT.offset( -1, -1 ), cu, chType, cu.left ? cu.left : cu.above );
if( cu.above || area.y > tu.cu->blocks[chType].y )
{
m_neighborSize[1] = numAboveUnits;
Position posAR{ posLT.x + ( PosType ) area.width, posLT.y };
m_neighborSize[1] += isAboveAvailable( tu, chType, posAR, numAboveRightUnits, unitWidth );
}
else
m_neighborSize[1] = 0;
if( cu.left || area.x > tu.cu->blocks[chType].x )
{
m_neighborSize[2] = numLeftUnits;
Position posLB{ posLT.x, posLT.y + ( PosType ) area.height };
m_neighborSize[2] += isLeftAvailable( tu, chType, posLB, numLeftBelowUnits, unitHeight );
}
else
m_neighborSize[2] = 0;
m_lastCUidx = tu.cu->idx;
}
int numIntraNeighbor = m_neighborSize[0] + m_neighborSize[1] + m_neighborSize[2];
// ----- Step 2: fill reference samples (depending on neighborhood) -----
const Pel* srcBuf = recoBuf.buf;
const ptrdiff_t srcStride = recoBuf.stride;
Pel* ptrDst = refBufUnfiltered;
const Pel* ptrSrc;
const Pel valueDC = 1 << (sps.getBitDepth( chType ) - 1);
if( numIntraNeighbor == 0 )
{
// Fill border with DC value
for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = valueDC; }
for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = valueDC; }
}
else if( numIntraNeighbor == totalUnits )
{
// Fill top-left border and top and top right with rec. samples
ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
for (int j = 0; j <= predSize + multiRefIdx; j++) { ptrDst[j] = ptrSrc[j]; }
ptrSrc = srcBuf - multiRefIdx * srcStride - (1 + multiRefIdx);
for (int i = 1; i <= predHSize + multiRefIdx; i++) { ptrDst[i*predStride] = *(ptrSrc); ptrSrc += srcStride; }
}
else // reference samples are partially available
{
// Fill top-left sample(s) if available
if ( m_neighborSize[2] > 0) { // left is available
// Fill left & below-left samples if available (downwards)
ptrSrc = srcBuf - (1 + multiRefIdx);
ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride;
int tmpSize = m_neighborSize[2] * unitHeight;
tmpSize = std::min(tmpSize, predHSize);
for (int i = 0; i < tmpSize; i++) {
ptrDst[i * predStride] = ptrSrc[i * srcStride];
}
// pad
Pel tmpPixel = ptrDst[(tmpSize - 1) * predStride];
for (int i = tmpSize; i < predHSize; i++) {
ptrDst[i * predStride] = tmpPixel;
}
// Fill top-left sample(s) if available
if ( m_neighborSize[0]) {
ptrSrc = srcBuf - (1 + multiRefIdx) * srcStride - (1 + multiRefIdx);
ptrDst = refBufUnfiltered;
memcpy(ptrDst, ptrSrc, sizeof(Pel) * (multiRefIdx + 1));
for (int i = 1; i <= multiRefIdx; i++) {
ptrDst[i * predStride] = ptrSrc[i * srcStride];
}
} else { // pad
ptrSrc = srcBuf - (1 + multiRefIdx); // left pixel
ptrDst = refBufUnfiltered;
tmpPixel = ptrSrc[0];
ptrDst[0] = tmpPixel;
for (int i = 1; i <= multiRefIdx; i++) {
ptrDst[i] = tmpPixel;
ptrDst[i * predStride] = tmpPixel;
}
}
// Fill above & above-right samples if available (left-to-right)
if ( m_neighborSize[1]) {
ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
ptrDst = refBufUnfiltered + 1 + multiRefIdx;
tmpSize = m_neighborSize[1] * unitWidth;
tmpSize = std::min(tmpSize, predSize);
memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel));
// pad
Pel tmpPixel = ptrDst[tmpSize - 1];
for (int i = tmpSize; i < predSize; i++) {
ptrDst[i] = tmpPixel;
}
} else { // all not available, pad
ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
ptrDst = refBufUnfiltered + 1 + multiRefIdx;
Pel tmpPixel = ptrDst[-1];
std::fill_n(ptrDst, predSize, tmpPixel);
}
} else { // left is not available, top must be available
// Fill above & above-right samples (left-to-right)
ptrSrc = srcBuf - srcStride * (1 + multiRefIdx);
ptrDst = refBufUnfiltered + 1 + multiRefIdx;
int tmpSize = m_neighborSize[1] * unitWidth;
tmpSize = std::min(tmpSize, predSize);
memcpy(ptrDst, ptrSrc, tmpSize * sizeof(Pel));
// pad
Pel tmpPixel = ptrDst[tmpSize - 1];
for (int i = tmpSize; i < predSize; i++) {
ptrDst[i] = tmpPixel;
}
tmpPixel = ptrSrc[0];
// pad top-left sample(s)
ptrDst = refBufUnfiltered;
ptrDst[0] = tmpPixel;
for (int i = 1; i <= multiRefIdx; i++) {
ptrDst[i] = tmpPixel;
ptrDst[i * predStride] = tmpPixel;
}
// pad left sample(s)
ptrDst = refBufUnfiltered + (1 + multiRefIdx) * predStride;
for (int i = 0; i < predHSize; i++) {
ptrDst[i * predStride] = tmpPixel;
}
}
}
}
void IntraPrediction::xFilterReferenceSamples( const Pel* refBufUnfiltered, Pel* refBufFiltered, const CompArea &area, const SPS &sps, int multiRefIdx, ptrdiff_t stride ) const
{
if (area.compID() != COMPONENT_Y)
{
multiRefIdx = 0;
}
const int predSize = m_topRefLength + multiRefIdx;
const int predHSize = m_leftRefLength + multiRefIdx;
const ptrdiff_t predStride = stride == 0 ? predSize + 1 : stride;
// Regular reference sample filter
const Pel *piSrcPtr = refBufUnfiltered + (predStride * predHSize); // bottom left
Pel *piDestPtr = refBufFiltered + (predStride * predHSize); // bottom left
// bottom left (not filtered)
*piDestPtr = *piSrcPtr;
piDestPtr -= predStride;
piSrcPtr -= predStride;
//left column (bottom to top)
for( int i = 1; i < predHSize; i++, piDestPtr -= predStride, piSrcPtr -= predStride)
{
*piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[-predStride] + 2) >> 2;
}
//top-left
*piDestPtr = (piSrcPtr[predStride] + 2 * piSrcPtr[0] + piSrcPtr[1] + 2) >> 2;
piDestPtr++;
piSrcPtr++;
//top row (left-to-right)
for( uint32_t i=1; i < predSize; i++, piDestPtr++, piSrcPtr++ )
{
*piDestPtr = (piSrcPtr[1] + 2 * piSrcPtr[0] + piSrcPtr[-1] + 2) >> 2;
}
// top right (not filtered)
*piDestPtr=*piSrcPtr;
}
static bool getUseFilterRef( const int predMode )
{
static const int angTable[32] = { 0, 1, 2, 3, 4, 6, 8, 10, 12, 14, 16, 18, 20, 23, 26, 29, 32, 35, 39, 45, 51, 57, 64, 73, 86, 102, 128, 171, 256, 341, 512, 1024 };
const int intraPredAngleMode = ( predMode >= DIA_IDX ) ? predMode - VER_IDX : -( predMode - HOR_IDX );
const int absAngMode = abs(intraPredAngleMode);
const int absAng = angTable [absAngMode];
return 0 == ( absAng & 0x1F );
}
bool IntraPrediction::useFilteredIntraRefSamples( const ComponentID &compID, const CodingUnit &cu, const UnitArea &tuArea )
{
//const SPS &sps = *cu.sps;
const ChannelType chType = toChannelType( compID );
// high level conditions
//if( sps.getSpsRangeExtension().getIntraSmoothingDisabledFlag() ) { return false; }
//if( !isLuma( chType ) ) { return false; }
//if( cu.ispMode() && isLuma(compID) ) { return false; }
//if( CU::isMIP( cu, chType ) ) { return false; }
if( cu.multiRefIdx() ) { return false; }
if( cu.bdpcmMode() ) { return false; }
// pred. mode related conditions
const int dirMode = PU::getFinalIntraMode( cu, chType );
if (dirMode == DC_IDX) { return false; }
if (dirMode == PLANAR_IDX)
{
return tuArea.blocks[compID].area() > 32 ? true : false;
}
const int predMode = getWideAngle(tuArea.blocks[compID].width, tuArea.blocks[compID].height, dirMode);
const int diff = std::min<int>( abs( predMode - HOR_IDX ), abs( predMode - VER_IDX ) );
const int log2Size = ( ( getLog2( tuArea.blocks[compID].width ) + getLog2( tuArea.blocks[compID].height ) ) >> 1 );
CHECKD( log2Size >= MAX_INTRA_FILTER_DEPTHS, "Size not supported" );
return diff > m_aucIntraFilter[ chType ][ log2Size ] && getUseFilterRef( predMode );
}
static inline TransformUnit const* getTU( const CodingUnit& cu, const Position& pos, const ChannelType chType )
{
const TransformUnit* ptu = &cu.firstTU;
if( !ptu->next ) return ptu;
while( !( ptu->blocks[chType].x + ptu->blocks[chType].width > pos.x && ptu->blocks[chType].y + ptu->blocks[chType].height > pos.y ) )
{
ptu = ptu->next;
}
return ptu;
}
int isAboveAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
const uint32_t uiNumUnitsInPU, const uint32_t unitWidth)
{
const CodingUnit &cu = *tu.cu;
const CodingStructure &cs = *cu.cs;
int maxDx = uiNumUnitsInPU * unitWidth;
Position refPos = posLT.offset(0, -1);
const TransformUnit *pcTUAbove = nullptr;
const int currTUIdx = tu.idx;
int dx = 0;
while( dx < maxDx )
{
const CodingUnit* cuAbove = cs.getCURestricted( refPos, cu, chType, pcTUAbove ? nullptr : cu.above );
if( !cuAbove ) break;
pcTUAbove = getTU( *cuAbove, refPos, chType );
if( cuAbove->ctuData == cu.ctuData && pcTUAbove->idx >= currTUIdx ) break;
int diff = ( int ) pcTUAbove->blocks[chType].width - refPos.x + pcTUAbove->blocks[chType].x;
dx += diff;
refPos.x += diff;
}
int neighborSize = dx / unitWidth;
neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU );
return neighborSize;
}
int isLeftAvailable(const TransformUnit &tu, const ChannelType &chType, const Position &posLT,
const uint32_t uiNumUnitsInPU, const uint32_t unitHeight)
{
const CodingUnit &cu = *tu.cu;
const CodingStructure &cs = *cu.cs;
int maxDy = uiNumUnitsInPU * unitHeight;
Position refPos = posLT.offset(-1, 0);
const TransformUnit *pcTULeft = nullptr;
int currTUIdx = tu.idx;
int dy = 0;
while( dy < maxDy )
{
const CodingUnit* cuLeft = cs.getCURestricted( refPos, cu, chType, pcTULeft ? nullptr : cu.left );
if( !cuLeft ) break;
pcTULeft = getTU( *cuLeft, refPos, chType );
if( cuLeft->ctuData == cu.ctuData && pcTULeft->idx >= currTUIdx ) break;
int diff = ( int ) pcTULeft->blocks[chType].height - refPos.y + pcTULeft->blocks[chType].y;
dy += diff;
refPos.y += diff;
}
int neighborSize = dy / unitHeight;
neighborSize = std::min<int>( neighborSize, uiNumUnitsInPU );
return neighborSize;
}
// LumaRecPixels
NO_THREAD_SANITIZE void IntraPrediction::xGetLumaRecPixels(const CodingUnit &cu, CompArea chromaArea)
{
int iDstStride = 0;
Pel* pDst0 = 0;
int curChromaMode = cu.intraDir[1];
if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
{
iDstStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
pDst0 = m_piYuvExt[1] + iDstStride + 1;
}
else
{
iDstStride = MAX_TU_SIZE_FOR_PROFILE + 1;
pDst0 = m_piYuvExt[1] + iDstStride + 1; //MMLM_SAMPLE_NEIGHBOR_LINES;
}
//assert 420 chroma subsampling
CompArea lumaArea = CompArea( COMPONENT_Y, chromaArea.lumaPos( cu.chromaFormat),
recalcSize( cu.chromaFormat, CHANNEL_TYPE_CHROMA, CHANNEL_TYPE_LUMA, chromaArea.size() ) );//needed for correct pos/size (4x4 Tus)
CHECK_RECOVERABLE( lumaArea.width == chromaArea.width && CHROMA_444 != cu.chromaFormat, "" );
CHECK_RECOVERABLE( lumaArea.height == chromaArea.height && CHROMA_444 != cu.chromaFormat && CHROMA_422 != cu.chromaFormat, "" );
const SizeType uiCWidth = chromaArea.width;
const SizeType uiCHeight = chromaArea.height;
CPelBuf Src = cu.cs->picture->getRecoBuf( lumaArea );
Pel const* pRecSrc0 = Src.bufAt( 0, 0 );
ptrdiff_t iRecStride = Src.stride;
int logSubWidthC = getChannelTypeScaleX(CHANNEL_TYPE_CHROMA, cu.chromaFormat);
int logSubHeightC = getChannelTypeScaleY(CHANNEL_TYPE_CHROMA, cu.chromaFormat);
ptrdiff_t iRecStride2 = iRecStride << logSubHeightC; // TODO: really Height here? not Width?
const int mult = 1 << logSubWidthC ;
const CompArea& area = isChroma( cu.chType() ) ? chromaArea : lumaArea;
const uint32_t uiTuWidth = area.width;
const uint32_t uiTuHeight = area.height;
int iBaseUnitSize = ( 1 << MIN_CU_LOG2 );
const int iUnitWidth = iBaseUnitSize >> getComponentScaleX( area.compID(), cu.chromaFormat );
const int iUnitHeight = iBaseUnitSize >> getComponentScaleY( area.compID(), cu.chromaFormat );
const int iTUWidthInUnits = uiTuWidth / iUnitWidth;
const int iTUHeightInUnits = uiTuHeight / iUnitHeight;
const int iAboveUnits = iTUWidthInUnits;
const int iLeftUnits = iTUHeightInUnits;
const int chromaUnitWidth = iBaseUnitSize >> getComponentScaleX(COMPONENT_Cb, cu.chromaFormat);
const int chromaUnitHeight = iBaseUnitSize >> getComponentScaleY(COMPONENT_Cb, cu.chromaFormat);
const int topTemplateSampNum = 2 * uiCWidth; // for MDLM, the number of template samples is 2W or 2H.
const int leftTemplateSampNum = 2 * uiCHeight;
CHECKD( !( m_topRefLength >= topTemplateSampNum ), "Error!" );
CHECKD( !( m_leftRefLength >= leftTemplateSampNum ), "Error!" );
int totalAboveUnits = (curChromaMode == MDLM_T_IDX) ? (topTemplateSampNum + (chromaUnitWidth - 1)) / chromaUnitWidth : iAboveUnits;
int totalLeftUnits = (curChromaMode == MDLM_L_IDX) ? (leftTemplateSampNum + (chromaUnitHeight - 1)) / chromaUnitHeight : iLeftUnits;
const int availlableLeftUnit = ( cu.left || chromaArea.x > cu.blocks[CH_C].x ) ? totalLeftUnits : 0;
const bool bLeftAvaillable = availlableLeftUnit >= iTUHeightInUnits;
const int availlableAboveUnit = ( cu.above || chromaArea.y > cu.blocks[CH_C].y ) ? totalAboveUnits : 0;
const bool bAboveAvaillable = availlableAboveUnit >= iTUWidthInUnits;
Pel* pDst = nullptr;
Pel const* piSrc = nullptr;
bool isFirstRowOfCtu = ( lumaArea.y & ( cu.sps->getCTUSize() - 1) ) == 0;
const ptrdiff_t strOffset = ( CHROMA_444 == cu.chromaFormat ) ? 0 : iRecStride;
int c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4
int c0_5tap = 1, c1_5tap = 4, c2_5tap = 1, c3_5tap = 1, c4_5tap = 1, offset_5tap = 4, shift_5tap = 3; //sum = 8
int c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 2, c4_6tap = 1, c5_6tap = 1, offset_6tap = 4, shift_6tap = 3; //sum = 8
switch (cu.chromaFormat)
{
case CHROMA_422: //overwrite filter coefficient values for 422
c0_3tap = 2, c1_3tap = 1, c2_3tap = 1, offset_3tap = 2, shift_3tap = 2; //sum = 4
c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0, offset_5tap = 0, shift_5tap = 0; //sum = 1
c0_6tap = 2, c1_6tap = 1, c2_6tap = 1, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 2, shift_6tap = 2; //sum = 4
break;
case CHROMA_444: //overwrite filter coefficient values for 444
c0_3tap = 1, c1_3tap = 0, c2_3tap = 0, offset_3tap = 0, shift_3tap = 0; //sum = 1
c0_5tap = 0, c1_5tap = 1, c2_5tap = 0, c3_5tap = 0, c4_5tap = 0, offset_5tap = 0, shift_5tap = 0; //sum = 1
c0_6tap = 1, c1_6tap = 0, c2_6tap = 0, c3_6tap = 0, c4_6tap = 0, c5_6tap = 0, offset_6tap = 0, shift_6tap = 0; //sum = 1
break;
default:
break;
}
if( bAboveAvaillable )
{
pDst = pDst0 - iDstStride;
int avaiAboveSizes = availlableAboveUnit * chromaUnitWidth;
for (int i = 0; i < avaiAboveSizes; i++)
{
if (isFirstRowOfCtu)
{
piSrc = pRecSrc0 - iRecStride;
if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
{
pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
}
else
{
pDst[i] = (piSrc[mult * i] * c0_3tap + piSrc[mult * i - 1] * c1_3tap + piSrc[mult * i + 1] * c2_3tap + offset_3tap) >> shift_3tap;
}
}
else if( cu.sps->getCclmCollocatedChromaFlag() )
{
piSrc = pRecSrc0 - iRecStride2;
if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
{
pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap
+ piSrc[mult * i] * c1_5tap + piSrc[mult * i] * c2_5tap + piSrc[mult * i + 1] * c3_5tap
+ piSrc[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
else
{
pDst[i] = (piSrc[mult * i - strOffset] * c0_5tap
+ piSrc[mult * i] * c1_5tap + piSrc[mult * i - 1] * c2_5tap + piSrc[mult * i + 1] * c3_5tap
+ piSrc[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
}
else
{
piSrc = pRecSrc0 - iRecStride2;
if ((i == 0 && !bLeftAvaillable) || (i == uiCWidth + avaiAboveSizes - 1 + logSubWidthC))
{
pDst[i] = ((piSrc[mult * i] * c0_6tap + piSrc[mult * i] * c1_6tap + piSrc[mult * i + 1] * c2_6tap)
+ (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap)
+ offset_6tap) >> shift_6tap;
}
else
{
pDst[i] = ((piSrc[mult * i] * c0_6tap + piSrc[mult * i - 1] * c1_6tap + piSrc[mult * i + 1] * c2_6tap)
+ (piSrc[mult * i + strOffset] * c3_6tap + piSrc[mult * i - 1 + strOffset] * c4_6tap + piSrc[mult * i + 1 + strOffset] * c5_6tap)
+ offset_6tap) >> shift_6tap;
}
}
}
}
if( bLeftAvaillable )
{
pDst = pDst0 - 1;
piSrc = pRecSrc0 - 2 - logSubWidthC;
int availlableLeftSizes = availlableLeftUnit * chromaUnitHeight;
for (int j = 0; j < availlableLeftSizes; j++)
{
if( cu.sps->getCclmCollocatedChromaFlag() )
{
if ((j == 0 && !bAboveAvaillable) || (j == uiCHeight + availlableLeftSizes - 1 + logSubWidthC))
{
pDst[0] = ( piSrc[1 ] * c0_5tap
+ piSrc[1 ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap
+ piSrc[1 + strOffset] * c4_5tap
+ offset_5tap ) >> shift_5tap;
}
else
{
pDst[0] = ( piSrc[1 - strOffset] * c0_5tap
+ piSrc[1 ] * c1_5tap + piSrc[0] * c2_5tap + piSrc[2] * c3_5tap
+ piSrc[1 + strOffset] * c4_5tap
+ offset_5tap ) >> shift_5tap;
}
}
else
{
pDst[0] = ((piSrc[1] * c0_6tap + piSrc[0] * c1_6tap + piSrc[2] * c2_6tap)
+ (piSrc[1 + strOffset] * c3_6tap + piSrc[strOffset] * c4_6tap + piSrc[2 + strOffset] * c5_6tap)
+ offset_6tap) >> shift_6tap;
}
piSrc += iRecStride2;
pDst += iDstStride;
}
}
if( cu.sps->getCclmCollocatedChromaFlag() )
{
// TODO: unroll loop
for( int j = 0; j < uiCHeight; j++ )
{
for( int i = 0; i < uiCWidth; i++ )
{
if( i == 0 && !bLeftAvaillable )
{
if( j == 0 && !bAboveAvaillable )
{
pDst0[i] = (pRecSrc0[mult * i] * c0_5tap
+ pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
+ pRecSrc0[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
else
{
pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap
+ pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
+ pRecSrc0[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
}
else if( j == 0 && !bAboveAvaillable )
{
pDst0[i] = (pRecSrc0[mult * i] * c0_5tap
+ pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
+ pRecSrc0[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
else
{
pDst0[i] = (pRecSrc0[mult * i - strOffset] * c0_5tap
+ pRecSrc0[mult * i] * c1_5tap + pRecSrc0[mult * i - 1] * c2_5tap + pRecSrc0[mult * i + 1] * c3_5tap
+ pRecSrc0[mult * i + strOffset] * c4_5tap
+ offset_5tap) >> shift_5tap;
}
}
pDst0 += iDstStride;
pRecSrc0 += iRecStride2;
}
return;
}
#define GET_LUMA_REC_PIX_INC \
pDst0 += iDstStride; \
pRecSrc0 += iRecStride2
#define GET_LUMA_REC_PIX_OP2(ADDR) \
pDst0[ADDR] = ( pRecSrc0[( (ADDR) << logSubWidthC ) ] * c0_6tap \
+ pRecSrc0[( (ADDR) << logSubWidthC ) + 1] * c1_6tap \
+ pRecSrc0[( (ADDR) << logSubWidthC ) - 1] * c2_6tap \
+ pRecSrc0[( (ADDR) << logSubWidthC ) + iRecStride] * c3_6tap \
+ pRecSrc0[( (ADDR) << logSubWidthC ) + 1 + iRecStride] * c4_6tap \
+ pRecSrc0[( (ADDR) << logSubWidthC ) - 1 + iRecStride] * c5_6tap \
+ offset_6tap ) >> shift_6tap
#define GET_LUMA_REC_PIX_OP1(ADDR) \
if( !(ADDR) ) \
{ \
pDst0[0] = ( pRecSrc0[0 ] * c0_6tap \
+ pRecSrc0[0 + 1] * c1_6tap \
+ pRecSrc0[0] * c2_6tap \
+ pRecSrc0[0 + iRecStride] * c3_6tap \
+ pRecSrc0[0 + 1 + iRecStride] * c4_6tap \
+ pRecSrc0[0 + iRecStride] * c5_6tap \
+ offset_6tap ) >> shift_6tap; \
} \
else \
{ \
GET_LUMA_REC_PIX_OP2(ADDR); \
}
int width = uiCWidth;
int height = uiCHeight;
if( bLeftAvaillable )
{
if( cu.chromaFormat == CHROMA_420 )
{
GetLumaRecPixel420( width, height, pRecSrc0, iRecStride, pDst0, iDstStride );
// SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC );
}
else //TODO add SIMD for 422,444
{
SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP2, GET_LUMA_REC_PIX_INC );
}
}
else
{
SIZE_AWARE_PER_EL_OP( GET_LUMA_REC_PIX_OP1, GET_LUMA_REC_PIX_INC );
}
}
#undef GET_LUMA_REC_PIX_INC
#undef GET_LUMA_REC_PIX_OP1
#undef GET_LUMA_REC_PIX_OP2
#undef SIZE_AWARE_PER_EL_OP
void IntraPrediction::xGetLMParameters(const CodingUnit &cu, const ComponentID compID,
const CompArea &chromaArea,
int &a, int &b, int &iShift)
{
CHECK_RECOVERABLE(compID == COMPONENT_Y, "");
const SizeType cWidth = chromaArea.width;
const SizeType cHeight = chromaArea.height;
const Position posLT = chromaArea;
const CodingStructure &cs = *cu.cs;
const SPS & sps = *cs.sps;
const uint32_t tuWidth = chromaArea.width;
const uint32_t tuHeight = chromaArea.height;
const ChromaFormat nChromaFormat = sps.getChromaFormatIdc();
const int baseUnitSize = 1 << MIN_CU_LOG2;
const int unitWidth = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat);
const int unitHeight = baseUnitSize >> getComponentScaleX(chromaArea.compID(), nChromaFormat);
const int tuWidthInUnits = tuWidth / unitWidth;
const int tuHeightInUnits = tuHeight / unitHeight;
const int aboveUnits = tuWidthInUnits;
const int leftUnits = tuHeightInUnits;
int topTemplateSampNum = 2 * cWidth; // for MDLM, the template sample number is 2W or 2H;
int leftTemplateSampNum = 2 * cHeight;
CHECKD( !(m_topRefLength >= topTemplateSampNum), "Error!" );
CHECKD( !(m_leftRefLength >= leftTemplateSampNum), "Error!" );
int totalAboveUnits = (topTemplateSampNum + (unitWidth - 1)) / unitWidth;
int totalLeftUnits = (leftTemplateSampNum + (unitHeight - 1)) / unitHeight;
int aboveRightUnits = totalAboveUnits - aboveUnits;
int leftBelowUnits = totalLeftUnits - leftUnits;
int curChromaMode = cu.intraDir[1];
bool aboveAvailable = 0, leftAvailable = 0;
const TransformUnit& tu = *getTU( cu, chromaArea.pos(), CH_C );
Pel *srcColor0, *curChroma0;
int srcStride, curStride;
PelBuf temp;
if ((curChromaMode == MDLM_L_IDX) || (curChromaMode == MDLM_T_IDX))
{
srcStride = 2 * MAX_TU_SIZE_FOR_PROFILE + 1;
temp = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea));
}
else
{
srcStride = MAX_TU_SIZE_FOR_PROFILE + 1;
temp = PelBuf(m_piYuvExt[1] + srcStride + 1, srcStride, Size(chromaArea));
}
srcColor0 = temp.bufAt(0, 0);
curChroma0 = getPredictorPtr(compID);
curStride = m_topRefLength + 1;
curChroma0 += curStride + 1;
unsigned internalBitDepth = sps.getBitDepth(CHANNEL_TYPE_CHROMA);
int minLuma[2] = { MAX_INT, 0 };
int maxLuma[2] = { -MAX_INT, 0 };
Pel *src = srcColor0 - srcStride;
Pel *cur = curChroma0 - curStride;
int actualTopTemplateSampNum = 0;
int actualLeftTemplateSampNum = 0;
if( curChromaMode == MDLM_T_IDX )
{
int avaiAboveUnits = 0;
if( tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y )
{
avaiAboveUnits = aboveUnits;
aboveRightUnits = aboveRightUnits > ( cHeight / unitWidth ) ? cHeight / unitWidth : aboveRightUnits;
avaiAboveUnits += isAboveAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x + ( PosType ) cWidth, posLT.y }, aboveRightUnits, unitWidth );
}
aboveAvailable = avaiAboveUnits >= tuWidthInUnits;
actualTopTemplateSampNum = unitWidth * avaiAboveUnits;
}
else if( curChromaMode == MDLM_L_IDX )
{
int avaiLeftUnits = 0;
if( tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x )
{
avaiLeftUnits = leftUnits;
leftBelowUnits = leftBelowUnits > ( cWidth / unitHeight ) ? cWidth / unitHeight : leftBelowUnits;
avaiLeftUnits += isLeftAvailable( tu, CHANNEL_TYPE_CHROMA, { posLT.x, posLT.y + ( PosType ) cHeight }, leftBelowUnits, unitHeight );
}
leftAvailable = avaiLeftUnits >= tuHeightInUnits;
actualLeftTemplateSampNum = unitHeight * avaiLeftUnits;
}
else if( curChromaMode == LM_CHROMA_IDX )
{
aboveAvailable = tu.cu->above || chromaArea.y > tu.cu->blocks[CH_C].y;
leftAvailable = tu.cu->left || chromaArea.x > tu.cu->blocks[CH_C].x;
actualTopTemplateSampNum = cWidth;
actualLeftTemplateSampNum = cHeight;
}
int startPos[2]; //0:Above, 1: Left
int pickStep[2];
int aboveIs4 = leftAvailable ? 0 : 1;
int leftIs4 = aboveAvailable ? 0 : 1;
startPos[0] = actualTopTemplateSampNum >> (2 + aboveIs4);
pickStep[0] = std::max(1, actualTopTemplateSampNum >> (1 + aboveIs4));
startPos[1] = actualLeftTemplateSampNum >> (2 + leftIs4);
pickStep[1] = std::max(1, actualLeftTemplateSampNum >> (1 + leftIs4));
Pel selectLumaPix[4] = { 0, 0, 0, 0 };
Pel selectChromaPix[4] = { 0, 0, 0, 0 };
int cntT, cntL;
cntT = cntL = 0;
int cnt = 0;
if (aboveAvailable)
{
cntT = std::min(actualTopTemplateSampNum, (1 + aboveIs4) << 1);
src = srcColor0 - srcStride;
cur = curChroma0 - curStride;
for (int pos = startPos[0]; cnt < cntT; pos += pickStep[0], cnt++)
{
selectLumaPix[cnt] = src[pos];
selectChromaPix[cnt] = cur[pos];
}
}
if (leftAvailable)
{
cntL = std::min(actualLeftTemplateSampNum, ( 1 + leftIs4 ) << 1 );
src = srcColor0 - 1;
cur = curChroma0 - 1;
for (int pos = startPos[1], cnt = 0; cnt < cntL; pos += pickStep[1], cnt++)
{
selectLumaPix[cnt + cntT] = src[pos * srcStride];
selectChromaPix[cnt+ cntT] = cur[pos * curStride];
}
}
cnt = cntL + cntT;
if (cnt == 2)
{
selectLumaPix[3] = selectLumaPix[0]; selectChromaPix[3] = selectChromaPix[0];
selectLumaPix[2] = selectLumaPix[1]; selectChromaPix[2] = selectChromaPix[1];
selectLumaPix[0] = selectLumaPix[1]; selectChromaPix[0] = selectChromaPix[1];
selectLumaPix[1] = selectLumaPix[3]; selectChromaPix[1] = selectChromaPix[3];
}
int minGrpIdx[2] = { 0, 2 };
int maxGrpIdx[2] = { 1, 3 };
int *tmpMinGrp = minGrpIdx;
int *tmpMaxGrp = maxGrpIdx;
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMinGrp[1]]) std::swap(tmpMinGrp[0], tmpMinGrp[1]);
if (selectLumaPix[tmpMaxGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMaxGrp[0], tmpMaxGrp[1]);
if (selectLumaPix[tmpMinGrp[0]] > selectLumaPix[tmpMaxGrp[1]]) std::swap(tmpMinGrp, tmpMaxGrp); // TODO: really? not std::swap(tmpMinGrp[0], tmpMaxGrp[1]); ?
if (selectLumaPix[tmpMinGrp[1]] > selectLumaPix[tmpMaxGrp[0]]) std::swap(tmpMinGrp[1], tmpMaxGrp[0]);
minLuma[0] = (selectLumaPix [tmpMinGrp[0]] + selectLumaPix [tmpMinGrp[1]] + 1) >> 1;
minLuma[1] = (selectChromaPix[tmpMinGrp[0]] + selectChromaPix[tmpMinGrp[1]] + 1) >> 1;
maxLuma[0] = (selectLumaPix [tmpMaxGrp[0]] + selectLumaPix [tmpMaxGrp[1]] + 1) >> 1;
maxLuma[1] = (selectChromaPix[tmpMaxGrp[0]] + selectChromaPix[tmpMaxGrp[1]] + 1) >> 1;
if (leftAvailable || aboveAvailable)
{
int diff = maxLuma[0] - minLuma[0];
if (diff > 0)
{
int diffC = maxLuma[1] - minLuma[1];
int x = getLog2( diff );
static const uint8_t DivSigTable[1 << 4] = {
// 4bit significands - 8 ( MSB is omitted )
0, 7, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 1, 1, 0
};
int normDiff = (diff << 4 >> x) & 15;
int v = DivSigTable[normDiff] | 8;
x += normDiff != 0;
int y = diffC == 0 ? 0 : getLog2( abs( diffC ) ) + 1;
int add = 1 << y >> 1;
a = (diffC * v + add) >> y;
iShift = 3 + x - y;
if ( iShift < 1 ) {
iShift = 1;
a = ( (a == 0)? 0: (a < 0)? -15 : 15 ); // a=Sign(a)*15
}
b = minLuma[1] - ((a * minLuma[0]) >> iShift);
}
else
{
a = 0;
b = minLuma[1];
iShift = 0;
}
}
else
{
a = 0;
b = 1 << (internalBitDepth - 1);
iShift = 0;
}
}
void IntraPrediction::initIntraMip( const CodingUnit &cu, const CompArea &area )
{
CHECK_RECOVERABLE( area.width > MIP_MAX_WIDTH || area.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
// prepare input (boundary) data for prediction
// CHECK( m_ipaParam.refFilterFlag, "ERROR: unfiltered refs expected for MIP" );
Pel *ptrSrc = getPredictorPtr( area.compID() );
const int srcStride = m_topRefLength + 1; //TODO: check this if correct
const int srcHStride = m_leftRefLength + 1;
m_matrixIntraPred.prepareInputForPred( CPelBuf( ptrSrc, srcStride, srcHStride ), area, cu.sps->getBitDepth( toChannelType( area.compID() ) ), area.compID() );
}
void IntraPrediction::predIntraMip( const ComponentID compId, PelBuf &piPred, const CodingUnit &cu )
{
CHECK_RECOVERABLE( piPred.width > MIP_MAX_WIDTH || piPred.height > MIP_MAX_HEIGHT, "Error: block size not supported for MIP" );
CHECK_RECOVERABLE( piPred.width != (1 << getLog2(piPred.width)) || piPred.height != (1 << getLog2(piPred.height)), "Error: expecting blocks of size 2^M x 2^N" );
// generate mode-specific prediction
uint32_t modeIdx = MAX_NUM_MIP_MODE;
bool transposeFlag = false;
if( compId == COMPONENT_Y )
{
modeIdx = cu.intraDir[CHANNEL_TYPE_LUMA];
transposeFlag = cu.mipTransposedFlag();
}
else
{
const CodingUnit &coLocatedLumaPU = PU::getCoLocatedLumaPU(cu);
CHECK_RECOVERABLE(cu.intraDir[CHANNEL_TYPE_CHROMA] != DM_CHROMA_IDX, "Error: MIP is only supported for chroma with DM_CHROMA.");
CHECK_RECOVERABLE(!coLocatedLumaPU.mipFlag(), "Error: Co-located luma CU should use MIP.");
modeIdx = coLocatedLumaPU.intraDir[CHANNEL_TYPE_LUMA];
transposeFlag = coLocatedLumaPU.mipTransposedFlag();
}
CHECK_RECOVERABLE(modeIdx >= getNumModesMip(piPred), "Error: Wrong MIP mode index");
const int bitDepth = cu.sps->getBitDepth( toChannelType( compId ) );
m_matrixIntraPred.predBlock( piPred, modeIdx, piPred, transposeFlag, bitDepth, compId, m_piYuvExt[0] );
}
}
↑ V502 Perhaps the '?:' operator works in a different way than it was expected. The '?:' operator has a lower priority than the '&&' operator.
↑ V730 Not all members of a class are initialized inside the constructor. Consider inspecting: m_piYuvExt, m_neighborSize, m_lastCUidx, m_topRefLength, m_leftRefLength.
↑ V557 Array overrun is possible. The value of 'scale' index could reach 31.
↑ V557 Array overrun is possible. The value of 'scale' index could reach 31.
↑ V1048 The 'c0_3tap' variable was assigned the same value.
↑ V1048 The 'c1_3tap' variable was assigned the same value.
↑ V1048 The 'c2_3tap' variable was assigned the same value.
↑ V1048 The 'offset_3tap' variable was assigned the same value.
↑ V1048 The 'shift_3tap' variable was assigned the same value.
↑ V1048 The 'c0_6tap' variable was assigned the same value.
↑ V1048 The 'c1_6tap' variable was assigned the same value.
↑ V1048 The 'c2_6tap' variable was assigned the same value.