Update name

lialan · lialan · commit c9e2754f04f5 · 2025-02-04T21:09:26.000Z
diff --git a/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h b/mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h
@@ -364,12 +364,12 @@ void populateVectorMaskMaterializationPatterns(RewritePatternSet &patterns,
                                                PatternBenefit benefit = 1);
 
 /// Appends patterns for emulating vector operations over narrow types with ops
-/// over wider types. The `useAtomicWrites` indicates whether to use
-/// op `memref.generic_atomic_rmw` to perform atomic subbyte storing, or just a
-/// rmw sequence otherwise.
+/// over wider types. The `disableAtomicRMW` indicates whether to use a normal
+/// read-modify-write sequence instead of using `memref.generic_atomic_rmw` to
+/// perform subbyte storing.
 void populateVectorNarrowTypeEmulationPatterns(
     const arith::NarrowTypeEmulationConverter &typeConverter,
-    RewritePatternSet &patterns, bool useAtomicWrites = true);
+    RewritePatternSet &patterns, bool disableAtomicRMW = false);
 
 /// Rewrite a vector `bitcast(trunci)` to use a more efficient sequence of
 /// vector operations comprising `shuffle` and `bitwise` ops.
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -334,9 +334,9 @@ static Value downcastSelectAndUpcast(OpBuilder &builder, Location loc,
 ///
 /// Result:
 ///   linearizedMemref = |2|2|3|3| : <4xi2> (<1xi8>)
-static void atomicRMWStore(OpBuilder &builder, Location loc,
-                           MemRefValue linearizedMemref, Value storeIdx,
-                           VectorValue valueToStore, Value mask) {
+static void atomicRMW(OpBuilder &builder, Location loc,
+                      MemRefValue linearizedMemref, Value storeIdx,
+                      VectorValue valueToStore, Value mask) {
   assert(valueToStore.getType().getRank() == 1 && "expected 1-D vector");
 
   // Create an atomic load-modify-write region using
@@ -363,12 +363,11 @@ static void atomicRMWStore(OpBuilder &builder, Location loc,
   builder.create<memref::AtomicYieldOp>(loc, scalarMaskedValue);
 }
 
-/// Generate a non-atomic read-modify-write sequence for subbyte storing.
-/// It has similar logic to `atomicRMWStore`, but without atomicity.
-static void nonAtomicRMWStore(OpBuilder &builder, Location loc,
-                              MemRefValue linearizedMemref,
-                              Value linearizedIndex, VectorValue valueToStore,
-                              Value mask) {
+/// Generate a non-atomic read-modify-write sequence for storing to the emulated
+/// type. It has similar logic to `atomicRMWStore`, but without atomicity.
+static void nonAtomicRMW(OpBuilder &builder, Location loc,
+                         MemRefValue linearizedMemref, Value linearizedIndex,
+                         VectorValue valueToStore, Value mask) {
   assert(valueToStore.getType().getRank() == 1 && "expected 1-D vector");
 
   auto oneElemVecType =
@@ -427,9 +426,9 @@ namespace {
 struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
   using OpConversionPattern::OpConversionPattern;
 
-  ConvertVectorStore(MLIRContext *context, bool useAtomicWrites)
+  ConvertVectorStore(MLIRContext *context, bool disableAtomicRMW)
       : OpConversionPattern<vector::StoreOp>(context),
-        useAtomicWrites_(useAtomicWrites) {}
+        disableAtomicRMW(disableAtomicRMW) {}
 
   LogicalResult
   matchAndRewrite(vector::StoreOp op, OpAdaptor adaptor,
@@ -557,6 +556,8 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
     auto subWidthStoreMaskType =
         VectorType::get({numSrcElemsPerDest}, rewriter.getI1Type());
 
+    auto storeFunc = disableAtomicRMW ? nonAtomicRMW : atomicRMW;
+
     // 1. Partial width store for the leading byte.
     // When the store address is not aligned to emulated width boundary, deal
     // with the unaligned part so that the rest elements are aligned to width
@@ -581,8 +582,6 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
           extractSliceIntoByte(rewriter, loc, valueToStore, 0,
                                frontSubWidthStoreElem, *foldedNumFrontPadElems);
 
-      auto storeFunc = useAtomicWrites_ ? atomicRMWStore : nonAtomicRMWStore;
-
       storeFunc(rewriter, loc, memrefBase, currentDestIndex,
                 cast<VectorValue>(value), frontMask.getResult());
     }
@@ -639,17 +638,16 @@ struct ConvertVectorStore final : OpConversionPattern<vector::StoreOp> {
       auto backMask = rewriter.create<arith::ConstantOp>(
           loc, DenseElementsAttr::get(subWidthStoreMaskType, maskValues));
 
-      subEmulatedWidthStore(rewriter, loc, memrefBase, currentDestIndex,
-                            cast<VectorValue>(subWidthStorePart),
-                            backMask.getResult());
+      storeFunc(rewriter, loc, memrefBase, currentDestIndex,
+                cast<VectorValue>(subWidthStorePart), backMask.getResult());
     }
 
     rewriter.eraseOp(op);
     return success();
   }
 
 private:
-  const bool useAtomicWrites_;
+  const bool disableAtomicRMW;
 };
 
 //===----------------------------------------------------------------------===//
@@ -1962,7 +1960,7 @@ struct RewriteVectorTranspose : OpRewritePattern<vector::TransposeOp> {
 
 void vector::populateVectorNarrowTypeEmulationPatterns(
     const arith::NarrowTypeEmulationConverter &typeConverter,
-    RewritePatternSet &patterns, bool useAtomicWrites) {
+    RewritePatternSet &patterns, bool disableAtomicRMW) {
 
   // Populate `vector.*` conversion patterns.
   // TODO: #119553 support atomicity
@@ -1973,7 +1971,7 @@ void vector::populateVectorNarrowTypeEmulationPatterns(
   // Populate `vector.*` store conversion patterns. The caller can choose
   // to avoid emitting atomic operations and reduce it to load-modify-write
   // sequence for stores if it is known there are no thread contentions.
-  patterns.insert<ConvertVectorStore>(patterns.getContext(), useAtomicWrites);
+  patterns.insert<ConvertVectorStore>(patterns.getContext(), disableAtomicRMW);
 }
 
 void vector::populateVectorNarrowTypeRewritePatterns(
diff --git a/mlir/test/lib/Dialect/MemRef/TestEmulateNarrowType.cpp b/mlir/test/lib/Dialect/MemRef/TestEmulateNarrowType.cpp
@@ -100,7 +100,7 @@ struct TestEmulateNarrowTypePass
     arith::populateArithNarrowTypeEmulationPatterns(typeConverter, patterns);
     memref::populateMemRefNarrowTypeEmulationPatterns(typeConverter, patterns);
     vector::populateVectorNarrowTypeEmulationPatterns(typeConverter, patterns,
-                                                      atomicStore);
+                                                      disableAtomicRMW);
 
     if (failed(applyPartialConversion(op, target, std::move(patterns))))
       signalPassFailure();
@@ -120,10 +120,11 @@ struct TestEmulateNarrowTypePass
       llvm::cl::desc("disable memref type conversion (to test failures)"),
       llvm::cl::init(false)};
 
-  Option<bool> atomicStore{
-      *this, "atomic-store",
-      llvm::cl::desc("use atomic store instead of load-modify-write"),
-      llvm::cl::init(true)};
+  Option<bool> disableAtomicRMW{
+      *this, "disable-atomic-rmw",
+      llvm::cl::desc("disable atomic read-modify-write and prefer generating "
+                     "normal sequence"),
+      llvm::cl::init(false)};
 };
 } // namespace