Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DAPHNE-#399] Aggregation kernels can return different value types #402

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 26 additions & 26 deletions src/runtime/local/kernels/AggAll.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,18 @@
// Struct for partial template specialization
// ****************************************************************************

template<class DT>
template<typename VTRes, class DTArg>
struct AggAll {
static typename DT::VT apply(AggOpCode opCode, const DT * arg, DCTX(ctx)) = delete;
static VTRes apply(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) = delete;
};

// ****************************************************************************
// Convenience function
// ****************************************************************************

template<class DT>
typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
return AggAll<DT>::apply(opCode, arg, ctx);
template<typename VTRes, class DTArg>
VTRes aggAll(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) {
return AggAll<VTRes, DTArg>::apply(opCode, arg, ctx);
}

// ****************************************************************************
Expand All @@ -52,33 +52,33 @@ typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
// scalar <- DenseMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggAll<DenseMatrix<VT>> {
static VT apply(AggOpCode opCode, const DenseMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggAll<VTRes, DenseMatrix<VTArg>> {
static VTRes apply(AggOpCode opCode, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

const VT * valuesArg = arg->getValues();
const VTArg * valuesArg = arg->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
VT agg;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
VTRes agg;
if (AggOpCodeUtils::isPureBinaryReduction(opCode)) {
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
agg = AggOpCodeUtils::template getNeutral<VT>(opCode);
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
agg = AggOpCodeUtils::template getNeutral<VTRes>(opCode);
}
else {
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
agg = VT(0);
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
agg = VTRes(0);
}

for(size_t r = 0; r < numRows; r++) {
for(size_t c = 0; c < numCols; c++)
agg = func(agg, valuesArg[c], ctx);
agg = func(agg, static_cast<VTRes>(valuesArg[c]), ctx);
valuesArg += arg->getRowSkip();
}
if (AggOpCodeUtils::isPureBinaryReduction(opCode))
Expand All @@ -99,13 +99,13 @@ struct AggAll<DenseMatrix<VT>> {
// scalar <- CSRMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggAll<CSRMatrix<VT>> {
static VT aggArray(const VT * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VT, VT, VT> func, bool isSparseSafe, VT neutral, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggAll<VTRes, CSRMatrix<VTArg>> {
static VTRes aggArray(const VTArg * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func, bool isSparseSafe, VTRes neutral, DCTX(ctx)) {
if(numNonZeros) {
VT agg = values[0];
VTRes agg = static_cast<VTRes>(values[0]);
for(size_t i = 1; i < numNonZeros; i++)
agg = func(agg, values[i], ctx);
agg = func(agg, static_cast<VTRes>(values[i]), ctx);

if(!isSparseSafe && numNonZeros < numCells)
agg = func(agg, 0, ctx);
Expand All @@ -116,30 +116,30 @@ struct AggAll<CSRMatrix<VT>> {
return func(neutral, 0, ctx);
}

static VT apply(AggOpCode opCode, const CSRMatrix<VT> * arg, DCTX(ctx)) {
static VTRes apply(AggOpCode opCode, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
if(AggOpCodeUtils::isPureBinaryReduction(opCode)) {

EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));

return aggArray(
arg->getValues(0),
arg->getNumNonZeros(),
arg->getNumRows() * arg->getNumCols(),
func,
AggOpCodeUtils::isSparseSafe(opCode),
AggOpCodeUtils::template getNeutral<VT>(opCode),
AggOpCodeUtils::template getNeutral<VTRes>(opCode),
ctx
);
}
else { // The op-code is either MEAN or STDDEV.
EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
auto agg = aggArray(
arg->getValues(0),
arg->getNumNonZeros(),
arg->getNumRows() * arg->getNumCols(),
func,
true,
VT(0),
VTRes(0),
ctx
);
if (opCode == AggOpCode::MEAN)
Expand Down
72 changes: 37 additions & 35 deletions src/runtime/local/kernels/AggCol.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,35 +55,37 @@ void aggCol(AggOpCode opCode, DTRes *& res, const DTArg * arg, DCTX(ctx)) {
// DenseMatrix <- DenseMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const DenseMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggCol<DenseMatrix<VTRes>, DenseMatrix<VTArg>> {
static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

if(res == nullptr)
res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, false);
res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, false);

const VT * valuesArg = arg->getValues();
VT * valuesRes = res->getValues();
const VTArg * valuesArg = arg->getValues();
VTRes * valuesRes = res->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
if(AggOpCodeUtils::isPureBinaryReduction(opCode))
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
else
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));

memcpy(valuesRes, valuesArg, numCols * sizeof(VT));

// memcpy(valuesRes, valuesArg, numCols * sizeof(VTRes));
// Can't memcpy because we might have different result type
for (size_t c = 0; c < numCols; c++)
valuesRes[c] = static_cast<VTRes>(valuesArg[c]);
for(size_t r = 1; r < numRows; r++) {
valuesArg += arg->getRowSkip();
for(size_t c = 0; c < numCols; c++)
valuesRes[c] = func(valuesRes[c], valuesArg[c], ctx);
valuesRes[c] = func(valuesRes[c], static_cast<VTRes>(valuesArg[c]), ctx);
}

if(AggOpCodeUtils::isPureBinaryReduction(opCode))
Expand All @@ -97,13 +99,13 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
if(opCode != AggOpCode::STDDEV)
return;

auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
VT * valuesT = tmp->getValues();
auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
VTRes * valuesT = tmp->getValues();
valuesArg = arg->getValues();

for(size_t r = 0; r < numRows; r++) {
for(size_t c = 0; c < numCols; c++) {
VT val = valuesArg[c] - valuesRes[c];
VTRes val = static_cast<VTRes>(valuesArg[c]) - valuesRes[c];
valuesT[c] = valuesT[c] + val * val;
}
valuesArg += arg->getRowSkip();
Expand All @@ -116,46 +118,46 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {

// TODO We could avoid copying by returning tmp and destroying res. But
// that might be wrong if res was not nullptr initially.
memcpy(valuesRes, valuesT, numCols * sizeof(VT));
DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);
}
};

// ----------------------------------------------------------------------------
// DenseMatrix <- CSRMatrix
// ----------------------------------------------------------------------------

template<typename VT>
struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const CSRMatrix<VT> * arg, DCTX(ctx)) {
template<typename VTRes, typename VTArg>
struct AggCol<DenseMatrix<VTRes>, CSRMatrix<VTArg>> {
static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
const size_t numRows = arg->getNumRows();
const size_t numCols = arg->getNumCols();

if(res == nullptr)
res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);

VT * valuesRes = res->getValues();
VTRes * valuesRes = res->getValues();

EwBinaryScaFuncPtr<VT, VT, VT> func;
EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
if(AggOpCodeUtils::isPureBinaryReduction(opCode))
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
else
// TODO Setting the function pointer yields the correct result.
// However, since MEAN and STDDEV are not sparse-safe, the program
// does not take the same path for doing the summation, and is less
// efficient.
// for MEAN and STDDDEV, we need to sum
func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));

const VT * valuesArg = arg->getValues(0);
const VTArg * valuesArg = arg->getValues(0);
const size_t * colIdxsArg = arg->getColIdxs(0);

const size_t numNonZeros = arg->getNumNonZeros();

if(AggOpCodeUtils::isSparseSafe(opCode)) {
for(size_t i = 0; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
}
}
else {
Expand All @@ -164,19 +166,19 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
const size_t numNonZerosFirstRowArg = arg->getNumNonZeros(0);
for(size_t i = 0; i < numNonZerosFirstRowArg; i++) {
size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = valuesArg[i];
valuesRes[colIdx] = static_cast<VTRes>(valuesArg[i]);
hist[colIdx]++;
}

if(arg->getNumRows() > 1) {
for(size_t i = numNonZerosFirstRowArg; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
hist[colIdx]++;
}
for(size_t c = 0; c < numCols; c++)
if(hist[c] < numRows)
valuesRes[c] = func(valuesRes[c], 0, ctx);
valuesRes[c] = func(valuesRes[c], VTRes(0), ctx);
}

delete[] hist;
Expand All @@ -193,13 +195,13 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
if(opCode != AggOpCode::STDDEV)
return;

auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
VT * valuesT = tmp->getValues();
auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
VTRes * valuesT = tmp->getValues();

size_t * nnzCol = new size_t[numCols](); // initialized to zeros
for(size_t i = 0; i < numNonZeros; i++) {
const size_t colIdx = colIdxsArg[i];
VT val = valuesArg[i] - valuesRes[colIdx];
VTRes val = static_cast<VTRes>(valuesArg[i]) - valuesRes[colIdx];
valuesT[colIdx] = valuesT[colIdx] + val * val;
nnzCol[colIdx]++;
}
Expand All @@ -216,8 +218,8 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {

// TODO We could avoid copying by returning tmp and destroying res. But
// that might be wrong if res was not nullptr initially.
memcpy(valuesRes, valuesT, numCols * sizeof(VT));
DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);

}
};
Expand Down
Loading