[codegen] Refactor of codegen passes [daphne-eu#889]

This PR majorly reworks codegen for AllAgg* and EwOps as well as add lowering for TransposeOp and Row/ColAgg*. All of these passes are added to the optional MLIR codegen pipeline that can be enabled using the --mlir-codegen flag and offer alternative lowering of these operations to MLIR rather than calls to precompiled C++ kernels. Currently, they only support DenseMatrix with dimensions that are known at compile-time and any value type (except Booleans). Except for IdxMin, IdxMax which are directly lowered to affine loops and TransposeOp which lowers to a named linalg op all passes make use of linalg GenericOps which are then lowered to affine loops in a later pass in the codegen pipeline. They convert the input DenseMatrix to a MemRef and create a new MemRef for the output that is converted into a DenseMatrix. Changes: - Add codegen for AllAgg*Op, Row/ColAgg*Op, Ew*Op and TransposeOp (see below for details) - Added passes to TableGen files and codegen pipeline - Added script level test cases / MLIR test cases (using FileCheck) - Replaced old tests Renamed some old test scripts for EwOps for better organization - Edited fusion.mlir test to lower Linalg to affine loops before applying fusion pass - Added Canonicalization passes for floor, ceil, round that removes the respective ops when input type is an integer (this also simplifies codegen) - Added some necessary instantiations in kernels.json - Restored alphabetic sorting of codegen passes in ir/daphneir/Passes.h Ops with new codegen: - AllAgg*Op Sum, Min, Max - Row/ColAgg*Op Sum, Min, Max, IdxMin, IdxMax - Ew*Op Unary (scalar/matrix): Abs, Sqrt, Exp, Ln, Sin, Cos, Floor, Ceil, Round Binary (scalar-scalar/matrix-matrix/matrix-scalar broadcasting): Add, Sub, Mul, Div, Pow, Max, Min - TransposeOp Known limitations are listed in the PR description [daphne-eu#889] Co-authored-by: philipportner [email protected]
AlexRTer · Nov 18, 2024 · 576bde3 · 576bde3
1 parent 287f4c5
commit 576bde3
Show file tree

Hide file tree

Showing 62 changed files with 3,225 additions and 675 deletions.
diff --git a/daphne-opt/daphne-opt.cpp b/daphne-opt/daphne-opt.cpp
@@ -19,24 +19,14 @@
 #include <mlir/Dialect/LLVMIR/LLVMDialect.h>
 
 #include "ir/daphneir/Passes.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/InitLLVM.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/ToolOutputFile.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Affine/Passes.h"
 #include "mlir/Dialect/Arith/IR/Arith.h"
 #include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
-#include "mlir/IR/Dialect.h"
 #include "mlir/IR/MLIRContext.h"
-#include "mlir/InitAllDialects.h"
 #include "mlir/InitAllPasses.h"
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Tools/mlir-opt/MlirOptMain.h"
 

diff --git a/src/compiler/execution/DaphneIrExecutor.cpp b/src/compiler/execution/DaphneIrExecutor.cpp
@@ -26,6 +26,7 @@
 #include <filesystem>
 
 #include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
 #include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
 #include "mlir/Conversion/MathToLLVM/MathToLLVM.h"
 #include "mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h"
@@ -39,6 +40,7 @@
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/MemRef/Transforms/Passes.h"
 #include "mlir/Dialect/SCF/IR/SCF.h"
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
 #include "mlir/ExecutionEngine/OptUtils.h"
@@ -258,7 +260,9 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
     pm.addPass(mlir::daphne::createDaphneOptPass());
     pm.addPass(mlir::daphne::createEwOpLoweringPass());
     pm.addPass(mlir::daphne::createAggAllOpLoweringPass());
+    pm.addPass(mlir::daphne::createAggDimOpLoweringPass());
     pm.addPass(mlir::daphne::createMapOpLoweringPass());
+    pm.addPass(mlir::daphne::createTransposeOpLoweringPass());
     pm.addPass(mlir::createInlinerPass());
 
     pm.addNestedPass<mlir::func::FuncOp>(mlir::createLoopFusionPass());
@@ -277,6 +281,13 @@ void DaphneIrExecutor::buildCodegenPipeline(mlir::PassManager &pm) {
     pm.addPass(mlir::daphne::createModOpLoweringPass());
     pm.addPass(mlir::createCanonicalizerPass());
     pm.addPass(mlir::createCSEPass());
+
+    pm.addNestedPass<mlir::func::FuncOp>(mlir::createLinalgGeneralizationPass());
+    pm.addNestedPass<mlir::func::FuncOp>(mlir::createConvertLinalgToAffineLoopsPass());
+
+    pm.addNestedPass<mlir::func::FuncOp>(mlir::memref::createFoldMemRefAliasOpsPass());
+    pm.addPass(mlir::memref::createNormalizeMemRefsPass());
+
     pm.addNestedPass<mlir::func::FuncOp>(mlir::createAffineScalarReplacementPass());
     pm.addPass(mlir::createLowerAffinePass());
     mlir::LowerVectorToLLVMOptions lowerVectorToLLVMOptions;

diff --git a/src/compiler/lowering/AggAllOpLowering.cpp b/src/compiler/lowering/AggAllOpLowering.cpp
diff --git a/src/compiler/lowering/AggDimOpLowering.cpp b/src/compiler/lowering/AggDimOpLowering.cpp
diff --git a/src/compiler/lowering/CMakeLists.txt b/src/compiler/lowering/CMakeLists.txt
@@ -32,6 +32,8 @@ add_mlir_dialect_library(MLIRDaphneTransforms
     MapOpLowering.cpp
     MatMulOpLowering.cpp
     AggAllOpLowering.cpp
+    AggDimOpLowering.cpp
+    TransposeOpLowering.cpp
 
     DEPENDS
     MLIRDaphneOpsIncGen
@@ -44,6 +46,7 @@ find_library(HWLOC_LIB NAMES libhwloc.so HINTS ${PROJECT_BINARY_DIR}/installed/l
 
 target_link_libraries(MLIRDaphneTransforms PUBLIC
     CompilerUtils
+    Util
     MLIRSCFToControlFlow
     MLIRArithToLLVM
     MLIRMemRefToLLVM

diff --git a/src/compiler/lowering/EwOpsLowering.cpp b/src/compiler/lowering/EwOpsLowering.cpp
diff --git a/src/compiler/lowering/TransposeOpLowering.cpp b/src/compiler/lowering/TransposeOpLowering.cpp
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2024 The DAPHNE Consortium
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "compiler/utils/LoweringUtils.h"
+#include "ir/daphneir/Daphne.h"
+#include "ir/daphneir/Passes.h"
+
+#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
+#include "mlir/Conversion/ArithToLLVM/ArithToLLVM.h"
+#include "mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h"
+#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVM.h"
+#include "mlir/Conversion/LLVMCommon/LoweringOptions.h"
+#include "mlir/Conversion/LLVMCommon/TypeConverter.h"
+#include "mlir/Dialect/Func/Transforms/FuncConversions.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
+#include "mlir/Dialect/Utils/StructuredOpsUtils.h"
+#include "mlir/Dialect/Vector/IR/VectorOps.h"
+#include "mlir/IR/AffineExpr.h"
+#include "mlir/IR/AffineMap.h"
+#include "mlir/IR/Builders.h"
+#include "mlir/IR/BuiltinAttributes.h"
+#include "mlir/IR/BuiltinDialect.h"
+#include "mlir/IR/BuiltinTypes.h"
+#include "mlir/IR/Location.h"
+#include "mlir/IR/MLIRContext.h"
+#include "mlir/IR/OpDefinition.h"
+#include "mlir/IR/Operation.h"
+#include "mlir/IR/PatternMatch.h"
+#include "mlir/IR/UseDefLists.h"
+#include "mlir/IR/Value.h"
+#include "mlir/IR/ValueRange.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Support/LLVM.h"
+#include "mlir/Support/LogicalResult.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+using namespace mlir;
+
+class TransposeOpLowering : public OpConversionPattern<daphne::TransposeOp> {
+  public:
+    using OpConversionPattern::OpConversionPattern;
+
+    explicit TransposeOpLowering(TypeConverter &typeConverter, MLIRContext *ctx)
+        : mlir::OpConversionPattern<daphne::TransposeOp>(typeConverter, ctx, PatternBenefit(1)) {
+        this->setDebugName("TransposeOpLowering");
+    }
+
+    /**
+     * @brief Replaces a Transpose operation with a Linalg TransposeOp if possible.
+     *
+     * @return mlir::success if Transpose has been replaced, else mlir::failure.
+     */
+    LogicalResult matchAndRewrite(daphne::TransposeOp op, OpAdaptor adaptor,
+                                  ConversionPatternRewriter &rewriter) const override {
+
+        daphne::MatrixType matrixType = adaptor.getArg().getType().dyn_cast<daphne::MatrixType>();
+        if (!matrixType) {
+            return failure();
+        }
+
+        Location loc = op->getLoc();
+
+        Type matrixElementType = matrixType.getElementType();
+        ssize_t numRows = matrixType.getNumRows();
+        ssize_t numCols = matrixType.getNumCols();
+
+        if (numRows < 0 || numCols < 0) {
+            return rewriter.notifyMatchFailure(
+                op, "transposeOp codegen currently only works with matrix dimensions that are known at compile time");
+        }
+
+        Value argMemref = rewriter.create<daphne::ConvertDenseMatrixToMemRef>(
+            loc, MemRefType::get({numRows, numCols}, matrixElementType), adaptor.getArg());
+
+        Value resMemref = rewriter.create<memref::AllocOp>(loc, MemRefType::get({numCols, numRows}, matrixElementType));
+
+        DenseI64ArrayAttr permutation = rewriter.getDenseI64ArrayAttr({1, 0});
+        rewriter.create<linalg::TransposeOp>(loc, argMemref, resMemref, permutation);
+
+        Value resDenseMatrix = convertMemRefToDenseMatrix(loc, rewriter, resMemref, op.getType());
+
+        rewriter.replaceOp(op, resDenseMatrix);
+
+        return success();
+    }
+};
+
+namespace {
+/**
+ * @brief Lowers the daphne::Transpose operator to a Linalg TransposeOp.
+ *
+ * This rewrite may enable loop fusion on the affine loops TransposeOp is
+ * lowered to by running the loop fusion pass.
+ */
+struct TransposeLoweringPass : public mlir::PassWrapper<TransposeLoweringPass, mlir::OperationPass<mlir::ModuleOp>> {
+    explicit TransposeLoweringPass() {}
+
+    StringRef getArgument() const final { return "lower-transpose"; }
+    StringRef getDescription() const final { return "Lowers Transpose operators to a Linalg TransposeOp."; }
+
+    void getDependentDialects(mlir::DialectRegistry &registry) const override {
+        registry.insert<mlir::LLVM::LLVMDialect, mlir::linalg::LinalgDialect, mlir::memref::MemRefDialect>();
+    }
+    void runOnOperation() final;
+};
+} // end anonymous namespace
+
+void TransposeLoweringPass::runOnOperation() {
+    mlir::ConversionTarget target(getContext());
+    mlir::RewritePatternSet patterns(&getContext());
+    LowerToLLVMOptions llvmOptions(&getContext());
+    LLVMTypeConverter typeConverter(&getContext(), llvmOptions);
+
+    typeConverter.addConversion(convertInteger);
+    typeConverter.addConversion(convertFloat);
+    typeConverter.addConversion([](Type type) { return type; });
+    typeConverter.addArgumentMaterialization(materializeCastFromIllegal);
+    typeConverter.addSourceMaterialization(materializeCastToIllegal);
+    typeConverter.addTargetMaterialization(materializeCastFromIllegal);
+
+    target.addLegalDialect<BuiltinDialect, daphne::DaphneDialect, linalg::LinalgDialect, memref::MemRefDialect>();
+
+    target.addDynamicallyLegalOp<daphne::TransposeOp>([](Operation *op) {
+        Type operand = op->getOperand(0).getType();
+        daphne::MatrixType matType = operand.dyn_cast<daphne::MatrixType>();
+        if (matType && matType.getRepresentation() == daphne::MatrixRepresentation::Dense) {
+            return false;
+        }
+        return true;
+    });
+
+    patterns.insert<TransposeOpLowering>(typeConverter, &getContext());
+    auto module = getOperation();
+    if (failed(applyPartialConversion(module, target, std::move(patterns)))) {
+        signalPassFailure();
+    }
+}
+
+std::unique_ptr<mlir::Pass> daphne::createTransposeOpLoweringPass() {
+    return std::make_unique<TransposeLoweringPass>();
+}
diff --git a/src/compiler/utils/LoweringUtils.cpp b/src/compiler/utils/LoweringUtils.cpp
@@ -25,7 +25,6 @@
 #include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/TypeUtilities.h"
 #include "mlir/IR/Value.h"
-#include "mlir/Transforms/Passes.h"
 
 /// Insert an allocation for the given MemRefType.
 mlir::Value insertMemRefAlloc(mlir::MemRefType type, mlir::Location loc, mlir::PatternRewriter &rewriter) {
@@ -131,3 +130,9 @@ mlir::Operation *findLastUseOfSSAValue(mlir::Value &v) {
 
     return lastUseOp;
 }
+
+mlir::Value convertToSignlessInt(mlir::OpBuilder &rewriter, mlir::Location loc, mlir::TypeConverter *typeConverter,
+                                 mlir::Value origVal, mlir::Type targetType) {
+    return typeConverter->materializeTargetConversion(
+        rewriter, loc, rewriter.getIntegerType(targetType.getIntOrFloatBitWidth()), origVal);
+}
diff --git a/src/compiler/utils/LoweringUtils.h b/src/compiler/utils/LoweringUtils.h
@@ -54,3 +54,6 @@ mlir::Type convertFloat(mlir::FloatType floatType);
 mlir::Type convertInteger(mlir::IntegerType intType);
 
 mlir::Operation *findLastUseOfSSAValue(mlir::Value &v);
+
+mlir::Value convertToSignlessInt(mlir::OpBuilder &rewriter, mlir::Location loc, mlir::TypeConverter *typeConverter,
+                                 mlir::Value origVal, mlir::Type targetType);
diff --git a/src/ir/daphneir/Canonicalize.cpp b/src/ir/daphneir/Canonicalize.cpp
@@ -512,6 +512,66 @@ mlir::LogicalResult mlir::daphne::ConvertMemRefToDenseMatrix::canonicalize(mlir:
     return mlir::success();
 }
 
+/**
+ * @brief Replaces `floor(a)` with `a` if `a` is an integer
+ * or a matrix of integers.
+ *
+ * @param op
+ * @param rewriter
+ * @return
+ */
+mlir::LogicalResult mlir::daphne::EwFloorOp::canonicalize(mlir::daphne::EwFloorOp op, mlir::PatternRewriter &rewriter) {
+    mlir::Value operand = op.getOperand();
+    auto matrix = operand.getType().dyn_cast<mlir::daphne::MatrixType>();
+    mlir::Type elemType = matrix ? matrix.getElementType() : operand.getType();
+
+    if (llvm::isa<mlir::IntegerType>(elemType)) {
+        rewriter.replaceOp(op, operand);
+        return mlir::success();
+    }
+    return mlir::failure();
+}
+
+/**
+ * @brief Replaces `ceil(a)` with `a` if `a` is an integer
+ * or a matrix of integers.
+ *
+ * @param op
+ * @param rewriter
+ * @return
+ */
+mlir::LogicalResult mlir::daphne::EwCeilOp::canonicalize(mlir::daphne::EwCeilOp op, mlir::PatternRewriter &rewriter) {
+    mlir::Value operand = op.getOperand();
+    auto matrix = operand.getType().dyn_cast<mlir::daphne::MatrixType>();
+    mlir::Type elemType = matrix ? matrix.getElementType() : operand.getType();
+
+    if (llvm::isa<mlir::IntegerType>(elemType)) {
+        rewriter.replaceOp(op, operand);
+        return mlir::success();
+    }
+    return mlir::failure();
+}
+
+/**
+ * @brief Replaces `round(a)` with `a` if `a` is an integer
+ * or a matrix of integers.
+ *
+ * @param op
+ * @param rewriter
+ * @return
+ */
+mlir::LogicalResult mlir::daphne::EwRoundOp::canonicalize(mlir::daphne::EwRoundOp op, mlir::PatternRewriter &rewriter) {
+    mlir::Value operand = op.getOperand();
+    auto matrix = operand.getType().dyn_cast<mlir::daphne::MatrixType>();
+    mlir::Type elemType = matrix ? matrix.getElementType() : operand.getType();
+
+    if (llvm::isa<mlir::IntegerType>(elemType)) {
+        rewriter.replaceOp(op, operand);
+        return mlir::success();
+    }
+    return mlir::failure();
+}
+
 mlir::LogicalResult mlir::daphne::RenameOp::canonicalize(mlir::daphne::RenameOp op, mlir::PatternRewriter &rewriter) {
     // Replace the RenameOp by its argument, since we only need
     // this operation during DaphneDSL parsing.

diff --git a/src/ir/daphneir/DaphneOps.td b/src/ir/daphneir/DaphneOps.td
@@ -241,9 +241,15 @@ def Daphne_EwNegOp : Daphne_EwUnaryOp<"ewNeg", NumScalar, [ValueTypeFromFirstArg
 // Rounding
 // ----------------------------------------------------------------------------
 
-def Daphne_EwRoundOp : Daphne_EwUnaryOp<"ewRound", NumScalar, [ValueTypeFromFirstArg]>;
-def Daphne_EwFloorOp : Daphne_EwUnaryOp<"ewFloor", NumScalar, [ValueTypeFromFirstArg]>;
-def Daphne_EwCeilOp : Daphne_EwUnaryOp<"ewCeil", NumScalar, [ValueTypeFromFirstArg]>;
+def Daphne_EwRoundOp : Daphne_EwUnaryOp<"ewRound", NumScalar, [ValueTypeFromFirstArg]> {
+    let hasCanonicalizeMethod = 1;
+}
+def Daphne_EwFloorOp : Daphne_EwUnaryOp<"ewFloor", NumScalar, [ValueTypeFromFirstArg]> {
+    let hasCanonicalizeMethod = 1;
+}
+def Daphne_EwCeilOp : Daphne_EwUnaryOp<"ewCeil", NumScalar, [ValueTypeFromFirstArg]> {
+    let hasCanonicalizeMethod = 1;
+}
 
 // ----------------------------------------------------------------------------
 // Trigonometric

diff --git a/src/ir/daphneir/Passes.h b/src/ir/daphneir/Passes.h
@@ -41,32 +41,34 @@ struct InferenceConfig {
 
 // alphabetically sorted list of passes
 std::unique_ptr<Pass> createAdaptTypesToKernelsPass();
+std::unique_ptr<Pass> createAggAllOpLoweringPass();
+std::unique_ptr<Pass> createAggDimOpLoweringPass();
+std::unique_ptr<Pass> createDaphneOptPass();
 std::unique_ptr<Pass> createDistributeComputationsPass();
 std::unique_ptr<Pass> createDistributePipelinesPass();
-std::unique_ptr<Pass> createMapOpLoweringPass();
 std::unique_ptr<Pass> createEwOpLoweringPass();
-std::unique_ptr<Pass> createModOpLoweringPass();
 std::unique_ptr<Pass> createInferencePass(InferenceConfig cfg = {false, true, true, true, true});
 std::unique_ptr<Pass> createInsertDaphneContextPass(const DaphneUserConfig &cfg);
-std::unique_ptr<Pass> createDaphneOptPass();
+std::unique_ptr<Pass> createLowerToLLVMPass(const DaphneUserConfig &cfg);
+std::unique_ptr<Pass> createManageObjRefsPass();
+std::unique_ptr<Pass> createMapOpLoweringPass();
 std::unique_ptr<OperationPass<ModuleOp>>
 createMatMulOpLoweringPass(bool matmul_tile, int matmul_vec_size_bits = 0,
                            std::vector<unsigned> matmul_fixed_tile_sizes = {}, bool matmul_use_fixed_tile_sizes = false,
                            int matmul_unroll_factor = 1, int matmul_unroll_jam_factor = 4,
                            int matmul_num_vec_registers = 16, bool matmul_invert_loops = false);
 std::unique_ptr<OperationPass<ModuleOp>> createMatMulOpLoweringPass();
-std::unique_ptr<Pass> createAggAllOpLoweringPass();
 std::unique_ptr<Pass> createMemRefTestPass();
-std::unique_ptr<Pass> createProfilingPass();
-std::unique_ptr<Pass> createLowerToLLVMPass(const DaphneUserConfig &cfg);
-std::unique_ptr<Pass> createManageObjRefsPass();
+std::unique_ptr<Pass> createModOpLoweringPass();
 std::unique_ptr<Pass> createPhyOperatorSelectionPass();
 std::unique_ptr<Pass> createPrintIRPass(std::string message = "");
+std::unique_ptr<Pass> createProfilingPass();
 std::unique_ptr<Pass> createRewriteSqlOpPass();
 std::unique_ptr<Pass> createRewriteToCallKernelOpPass(const DaphneUserConfig &cfg,
                                                       std::unordered_map<std::string, bool> &usedLibPaths);
 std::unique_ptr<Pass> createSelectMatrixRepresentationsPass(const DaphneUserConfig &cfg);
 std::unique_ptr<Pass> createSpecializeGenericFunctionsPass(const DaphneUserConfig &cfg);
+std::unique_ptr<Pass> createTransposeOpLoweringPass();
 std::unique_ptr<Pass> createVectorizeComputationsPass();
 #ifdef USE_CUDA
 std::unique_ptr<Pass> createMarkCUDAOpsPass(const DaphneUserConfig &cfg);