daphne-eu · pdamme · Jun 9, 2023 · Jun 6, 2022 · Jul 27, 2022 · Jul 28, 2022
diff --git a/src/runtime/local/kernels/AggAll.h b/src/runtime/local/kernels/AggAll.h
@@ -30,18 +30,18 @@
 // Struct for partial template specialization
 // ****************************************************************************
 
-template<class DT>
+template<typename VTRes, class DTArg>
 struct AggAll {
-    static typename DT::VT apply(AggOpCode opCode, const DT * arg, DCTX(ctx)) = delete;
+    static VTRes apply(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) = delete;
 };
 
 // ****************************************************************************
 // Convenience function
 // ****************************************************************************
 
-template<class DT>
-typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
-    return AggAll<DT>::apply(opCode, arg, ctx);
+template<typename VTRes, class DTArg>
+VTRes aggAll(AggOpCode opCode, const DTArg * arg, DCTX(ctx)) {
+    return AggAll<VTRes, DTArg>::apply(opCode, arg, ctx);
 }
 
 // ****************************************************************************
@@ -52,33 +52,33 @@ typename DT::VT aggAll(AggOpCode opCode, const DT * arg, DCTX(ctx)) {
 // scalar <- DenseMatrix
 // ----------------------------------------------------------------------------
 
-template<typename VT>
-struct AggAll<DenseMatrix<VT>> {
-    static VT apply(AggOpCode opCode, const DenseMatrix<VT> * arg, DCTX(ctx)) {
+template<typename VTRes, typename VTArg>
+struct AggAll<VTRes, DenseMatrix<VTArg>> {
+    static VTRes apply(AggOpCode opCode, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
         const size_t numRows = arg->getNumRows();
         const size_t numCols = arg->getNumCols();
 
-        const VT * valuesArg = arg->getValues();
+        const VTArg * valuesArg = arg->getValues();
 
-        EwBinaryScaFuncPtr<VT, VT, VT> func;
-        VT agg;
+        EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
+        VTRes agg;
         if (AggOpCodeUtils::isPureBinaryReduction(opCode)) {
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
-            agg = AggOpCodeUtils::template getNeutral<VT>(opCode);
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
+            agg = AggOpCodeUtils::template getNeutral<VTRes>(opCode);
         }
         else {
             // TODO Setting the function pointer yields the correct result.
             // However, since MEAN and STDDEV are not sparse-safe, the program
             // does not take the same path for doing the summation, and is less
             // efficient.
             // for MEAN and STDDDEV, we need to sum
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
-            agg = VT(0);
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
+            agg = VTRes(0);
         }
 
         for(size_t r = 0; r < numRows; r++) {
             for(size_t c = 0; c < numCols; c++)
-                agg = func(agg, valuesArg[c], ctx);
+                agg = func(agg, static_cast<VTRes>(valuesArg[c]), ctx);
             valuesArg += arg->getRowSkip();
         }
         if (AggOpCodeUtils::isPureBinaryReduction(opCode))
@@ -99,13 +99,13 @@ struct AggAll<DenseMatrix<VT>> {
 // scalar <- CSRMatrix
 // ----------------------------------------------------------------------------
 
-template<typename VT>
-struct AggAll<CSRMatrix<VT>> {
-    static VT aggArray(const VT * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VT, VT, VT> func, bool isSparseSafe, VT neutral, DCTX(ctx)) {
+template<typename VTRes, typename VTArg>
+struct AggAll<VTRes, CSRMatrix<VTArg>> {
+    static VTRes aggArray(const VTArg * values, size_t numNonZeros, size_t numCells, EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func, bool isSparseSafe, VTRes neutral, DCTX(ctx)) {
         if(numNonZeros) {
-            VT agg = values[0];
+            VTRes agg = static_cast<VTRes>(values[0]);
             for(size_t i = 1; i < numNonZeros; i++)
-                agg = func(agg, values[i], ctx);
+                agg = func(agg, static_cast<VTRes>(values[i]), ctx);
 
             if(!isSparseSafe && numNonZeros < numCells)
                 agg = func(agg, 0, ctx);
@@ -116,30 +116,30 @@ struct AggAll<CSRMatrix<VT>> {
             return func(neutral, 0, ctx);
     }
 
-    static VT apply(AggOpCode opCode, const CSRMatrix<VT> * arg, DCTX(ctx)) {
+    static VTRes apply(AggOpCode opCode, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
         if(AggOpCodeUtils::isPureBinaryReduction(opCode)) {
 
-            EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
+            EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
 
             return aggArray(
                     arg->getValues(0),
                     arg->getNumNonZeros(),
                     arg->getNumRows() * arg->getNumCols(),
                     func,
                     AggOpCodeUtils::isSparseSafe(opCode),
-                    AggOpCodeUtils::template getNeutral<VT>(opCode),
+                    AggOpCodeUtils::template getNeutral<VTRes>(opCode),
                     ctx
             );
         }
         else { // The op-code is either MEAN or STDDEV.
-            EwBinaryScaFuncPtr<VT, VT, VT> func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));            
+            EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));            
             auto agg = aggArray(
                 arg->getValues(0),
                 arg->getNumNonZeros(),
                 arg->getNumRows() * arg->getNumCols(),
                 func,
                 true,
-                VT(0),
+                VTRes(0),
                 ctx
             );
             if (opCode == AggOpCode::MEAN)

diff --git a/src/runtime/local/kernels/AggCol.h b/src/runtime/local/kernels/AggCol.h
@@ -55,35 +55,37 @@ void aggCol(AggOpCode opCode, DTRes *& res, const DTArg * arg, DCTX(ctx)) {
 // DenseMatrix <- DenseMatrix
 // ----------------------------------------------------------------------------
 
-template<typename VT>
-struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
-    static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const DenseMatrix<VT> * arg, DCTX(ctx)) {
+template<typename VTRes, typename VTArg>
+struct AggCol<DenseMatrix<VTRes>, DenseMatrix<VTArg>> {
+    static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const DenseMatrix<VTArg> * arg, DCTX(ctx)) {
         const size_t numRows = arg->getNumRows();
         const size_t numCols = arg->getNumCols();
 
         if(res == nullptr)
-            res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, false);
+            res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, false);
 
-        const VT * valuesArg = arg->getValues();
-        VT * valuesRes = res->getValues();
+        const VTArg * valuesArg = arg->getValues();
+        VTRes * valuesRes = res->getValues();
 
-        EwBinaryScaFuncPtr<VT, VT, VT> func;
+        EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
         if(AggOpCodeUtils::isPureBinaryReduction(opCode))
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
         else
             // TODO Setting the function pointer yields the correct result.
             // However, since MEAN and STDDEV are not sparse-safe, the program
             // does not take the same path for doing the summation, and is less
             // efficient.
             // for MEAN and STDDDEV, we need to sum
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
 
-        memcpy(valuesRes, valuesArg, numCols * sizeof(VT));
-
+        // memcpy(valuesRes, valuesArg, numCols * sizeof(VTRes));
+        // Can't memcpy because we might have different result type
+        for (size_t c = 0; c < numCols; c++)
+            valuesRes[c] = static_cast<VTRes>(valuesArg[c]);
         for(size_t r = 1; r < numRows; r++) {
             valuesArg += arg->getRowSkip();
             for(size_t c = 0; c < numCols; c++)
-                valuesRes[c] = func(valuesRes[c], valuesArg[c], ctx);
+                valuesRes[c] = func(valuesRes[c], static_cast<VTRes>(valuesArg[c]), ctx);
         }
 
         if(AggOpCodeUtils::isPureBinaryReduction(opCode))
@@ -97,13 +99,13 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
         if(opCode != AggOpCode::STDDEV)
             return;
 
-        auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
-        VT * valuesT = tmp->getValues();
+        auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
+        VTRes * valuesT = tmp->getValues();
         valuesArg = arg->getValues();
 
         for(size_t r = 0; r < numRows; r++) {
             for(size_t c = 0; c < numCols; c++) {
-                VT val = valuesArg[c] - valuesRes[c];
+                VTRes val = static_cast<VTRes>(valuesArg[c]) - valuesRes[c];
                 valuesT[c] = valuesT[c] + val * val;
             }
             valuesArg += arg->getRowSkip();
@@ -116,46 +118,46 @@ struct AggCol<DenseMatrix<VT>, DenseMatrix<VT>> {
 
         // TODO We could avoid copying by returning tmp and destroying res. But
         // that might be wrong if res was not nullptr initially.
-        memcpy(valuesRes, valuesT, numCols * sizeof(VT));
-        DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
+        memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
+        DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);
     }
 };
 
 // ----------------------------------------------------------------------------
 // DenseMatrix <- CSRMatrix
 // ----------------------------------------------------------------------------
 
-template<typename VT>
-struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
-    static void apply(AggOpCode opCode, DenseMatrix<VT> *& res, const CSRMatrix<VT> * arg, DCTX(ctx)) {
+template<typename VTRes, typename VTArg>
+struct AggCol<DenseMatrix<VTRes>, CSRMatrix<VTArg>> {
+    static void apply(AggOpCode opCode, DenseMatrix<VTRes> *& res, const CSRMatrix<VTArg> * arg, DCTX(ctx)) {
         const size_t numRows = arg->getNumRows();
         const size_t numCols = arg->getNumCols();
 
         if(res == nullptr)
-            res = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
+            res = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
 
-        VT * valuesRes = res->getValues();
+        VTRes * valuesRes = res->getValues();
 
-        EwBinaryScaFuncPtr<VT, VT, VT> func;
+        EwBinaryScaFuncPtr<VTRes, VTRes, VTRes> func;
         if(AggOpCodeUtils::isPureBinaryReduction(opCode))
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(opCode));
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(opCode));
         else
             // TODO Setting the function pointer yields the correct result.
             // However, since MEAN and STDDEV are not sparse-safe, the program
             // does not take the same path for doing the summation, and is less
             // efficient.
             // for MEAN and STDDDEV, we need to sum
-            func = getEwBinaryScaFuncPtr<VT, VT, VT>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
+            func = getEwBinaryScaFuncPtr<VTRes, VTRes, VTRes>(AggOpCodeUtils::getBinaryOpCode(AggOpCode::SUM));
 
-        const VT * valuesArg = arg->getValues(0);
+        const VTArg * valuesArg = arg->getValues(0);
         const size_t * colIdxsArg = arg->getColIdxs(0);
 
         const size_t numNonZeros = arg->getNumNonZeros();
 
         if(AggOpCodeUtils::isSparseSafe(opCode)) {
             for(size_t i = 0; i < numNonZeros; i++) {
                 const size_t colIdx = colIdxsArg[i];
-                valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
+                valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
             }
         }
         else {
@@ -164,19 +166,19 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
             const size_t numNonZerosFirstRowArg = arg->getNumNonZeros(0);
             for(size_t i = 0; i < numNonZerosFirstRowArg; i++) {
                 size_t colIdx = colIdxsArg[i];
-                valuesRes[colIdx] = valuesArg[i];
+                valuesRes[colIdx] = static_cast<VTRes>(valuesArg[i]);
                 hist[colIdx]++;
             }
 
             if(arg->getNumRows() > 1) {
                 for(size_t i = numNonZerosFirstRowArg; i < numNonZeros; i++) {
                     const size_t colIdx = colIdxsArg[i];
-                    valuesRes[colIdx] = func(valuesRes[colIdx], valuesArg[i], ctx);
+                    valuesRes[colIdx] = func(valuesRes[colIdx], static_cast<VTRes>(valuesArg[i]), ctx);
                     hist[colIdx]++;
                 }
                 for(size_t c = 0; c < numCols; c++)
                     if(hist[c] < numRows)
-                        valuesRes[c] = func(valuesRes[c], 0, ctx);
+                        valuesRes[c] = func(valuesRes[c], VTRes(0), ctx);
             }
 
             delete[] hist;
@@ -193,13 +195,13 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
         if(opCode != AggOpCode::STDDEV)
             return;
 
-        auto tmp = DataObjectFactory::create<DenseMatrix<VT>>(1, numCols, true);
-        VT * valuesT = tmp->getValues();
+        auto tmp = DataObjectFactory::create<DenseMatrix<VTRes>>(1, numCols, true);
+        VTRes * valuesT = tmp->getValues();
 
         size_t * nnzCol = new size_t[numCols](); // initialized to zeros
         for(size_t i = 0; i < numNonZeros; i++) {
             const size_t colIdx = colIdxsArg[i];
-            VT val = valuesArg[i] - valuesRes[colIdx];
+            VTRes val = static_cast<VTRes>(valuesArg[i]) - valuesRes[colIdx];
             valuesT[colIdx] = valuesT[colIdx] + val * val;
             nnzCol[colIdx]++;
         }
@@ -216,8 +218,8 @@ struct AggCol<DenseMatrix<VT>, CSRMatrix<VT>> {
 
         // TODO We could avoid copying by returning tmp and destroying res. But
         // that might be wrong if res was not nullptr initially.
-        memcpy(valuesRes, valuesT, numCols * sizeof(VT));
-        DataObjectFactory::destroy<DenseMatrix<VT>>(tmp);
+        memcpy(valuesRes, valuesT, numCols * sizeof(VTRes));
+        DataObjectFactory::destroy<DenseMatrix<VTRes>>(tmp);
 
     }
 };