[CIR][CIRGen] Add support for builtin bit operations (#474)

This PR adds CIRGen support for the following built-in bit operations: - `__builtin_ffs{,l,ll,g}` - `__builtin_clz{,l,ll,g}` - `__builtin_ctz{,l,ll,g}` - `__builtin_clrsb{,l,ll,g}` - `__builtin_popcount{,l,ll,g}` - `__builtin_parity{,l,ll,g}` This PR adds a new operation, `cir.bits`, to represent such bit operations on the input integers. LLVMIR lowering support is not included in this PR. > [!NOTE] > As a side note, C++20 adds the `<bit>` header which includes some bit operation functions with similar functionalities to the built-in functions mentioned above. However, these standard library functions have slightly different semantics than the built-in ones and this PR does not include support for these standard library functions. Support for these functions may be added later, or amended into this PR if the reviewers request so. Co-authored-by: Bruno Cardoso Lopes <[email protected]>
llvm · Apr 29, 2024 · d2985ca · d2985ca
1 parent 266b004
commit d2985ca
Show file tree

Hide file tree

Showing 8 changed files with 631 additions and 1 deletion.
diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td
@@ -985,6 +985,167 @@ def CmpOp : CIR_Op<"cmp", [Pure, SameTypeOperands]> {
   let hasVerifier = 0;
 }
 
+//===----------------------------------------------------------------------===//
+// BitsOp
+//===----------------------------------------------------------------------===//
+
+class CIR_BitOp<string mnemonic, TypeConstraint inputTy>
+    : CIR_Op<mnemonic, [Pure]> {
+  let arguments = (ins inputTy:$input);
+  let results = (outs SInt32:$result);
+
+  let assemblyFormat = [{
+    `(` $input `:` type($input) `)` `:` type($result) attr-dict
+  }];
+}
+
+def BitClrsbOp : CIR_BitOp<"bit.clrsb", SIntOfWidths<[32, 64]>> {
+  let summary = "Get the number of leading redundant sign bits in the input";
+  let description = [{
+    Compute the number of leading redundant sign bits in the input integer.
+
+    The input integer must be a signed integer. The most significant bit of the
+    input integer is the sign bit. The `cir.bit.clrsb` operation returns the
+    number of redundant sign bits in the input, that is, the number of bits
+    following the most significant bit that are identical to it.
+
+    The bit width of the input integer must be either 32 or 64.
+
+    Examples:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0xDEADBEEF, 0b1101_1110_1010_1101_1011_1110_1110_1111
+    %0 = cir.const(#cir.int<3735928559> : !s32i) : !s32i
+    // %1 will be 1 because there is 1 bit following the most significant bit
+    // that is identical to it.
+    %1 = cir.bit.clrsb(%0 : !s32i) : !s32i
+
+    // %2 = 1, 0b0000_0000_0000_0000_0000_0000_0000_0001
+    %2 = cir.const(#cir.int<1> : !s32i) : !s32i
+    // %3 will be 30
+    %3 = cir.bit.clrsb(%2 : !s32i) : !s32i
+    ```
+  }];
+}
+
+def BitClzOp : CIR_BitOp<"bit.clz", UIntOfWidths<[16, 32, 64]>> {
+  let summary = "Get the number of leading 0-bits in the input";
+  let description = [{
+    Compute the number of leading 0-bits in the input.
+
+    The input integer must be an unsigned integer. The `cir.bit.clz` operation
+    returns the number of consecutive 0-bits at the most significant bit
+    position in the input.
+
+    This operation invokes undefined behavior if the input value is 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0b0000_0000_0000_0000_0000_0000_0000_1000
+    %0 = cir.const(#cir.int<8> : !u32i) : !u32i
+    // %1 will be 28
+    %1 = cir.bit.clz(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
+def BitCtzOp : CIR_BitOp<"bit.ctz", UIntOfWidths<[16, 32, 64]>> {
+  let summary = "Get the number of trailing 0-bits in the input";
+  let description = [{
+    Compute the number of trailing 0-bits in the input.
+
+    The input integer must be an unsigned integer. The `cir.bit.ctz` operation
+    returns the number of consecutive 0-bits at the least significant bit
+    position in the input.
+
+    This operation invokes undefined behavior if the input value is 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0b1000
+    %0 = cir.const(#cir.int<8> : !u32i) : !u32i
+    // %1 will be 3
+    %1 = cir.bit.ctz(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
+def BitFfsOp : CIR_BitOp<"bit.ffs", SIntOfWidths<[32, 64]>> {
+  let summary = "Get the position of the least significant 1-bit of input";
+  let description = [{
+    Compute the position of the least significant 1-bit of the input.
+
+    The input integer must be a signed integer. The `cir.bit.ffs` operation
+    returns one plus the index of the least significant 1-bit of the input
+    signed integer. As a special case, if the input integer is 0, `cir.bit.ffs`
+    returns 0.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+
+    // %0 = 0x0010_1000
+    %0 = cir.const(#cir.int<40> : !s32i) : !s32i
+    // #1 will be 4 since the 4th least significant bit is 1.
+    %1 = cir.bit.ffs(%0 : !s32i) : !s32i
+    ```
+  }];
+}
+
+def BitParityOp : CIR_BitOp<"bit.parity", UIntOfWidths<[32, 64]>> {
+  let summary = "Get the parity of input";
+  let description = [{
+    Compute the parity of the input. The parity of an integer is the number of
+    1-bits in it modulo 2.
+
+    The input must be an unsigned integer.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0x0110_1000
+    %0 = cir.const(#cir.int<104> : !u32i) : !s32i
+    // %1 will be 1 since there are 3 1-bits in %0
+    %1 = cir.bit.parity(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
+def BitPopcountOp : CIR_BitOp<"bit.popcount", UIntOfWidths<[16, 32, 64]>> {
+  let summary = "Get the number of 1-bits in input";
+  let description = [{
+    Compute the number of 1-bits in the input.
+
+    The input must be an unsigned integer.
+
+    Example:
+
+    ```mlir
+    !s32i = !cir.int<s, 32>
+    !u32i = !cir.int<u, 32>
+
+    // %0 = 0x0110_1000
+    %0 = cir.const(#cir.int<104> : !u32i) : !s32i
+    // %1 will be 3 since there are 3 1-bits in %0
+    %1 = cir.bit.popcount(%0 : !u32i) : !s32i
+    ```
+  }];
+}
+
 //===----------------------------------------------------------------------===//
 // SwitchOp
 //===----------------------------------------------------------------------===//

diff --git a/clang/include/clang/CIR/Dialect/IR/CIRTypes.td b/clang/include/clang/CIR/Dialect/IR/CIRTypes.td
@@ -97,6 +97,36 @@ def SInt16 : SInt<16>;
 def SInt32 : SInt<32>;
 def SInt64 : SInt<64>;
 
+// A type constraint that allows unsigned integer type whose width is among the
+// specified list of possible widths.
+class UIntOfWidths<list<int> widths>
+  : Type<And<[
+            CPred<"$_self.isa<::mlir::cir::IntType>()">,
+            CPred<"$_self.cast<::mlir::cir::IntType>().isUnsigned()">,
+            Or<!foreach(
+              w, widths,
+              CPred<"$_self.cast<::mlir::cir::IntType>().getWidth() == " # w>
+            )>
+        ]>,
+        !interleave(!foreach(w, widths, w # "-bit"), " or ") # " uint",
+        "::mlir::cir::IntType"
+    > {}
+
+// A type constraint that allows unsigned integer type whose width is among the
+// specified list of possible widths.
+class SIntOfWidths<list<int> widths>
+  : Type<And<[
+            CPred<"$_self.isa<::mlir::cir::IntType>()">,
+            CPred<"$_self.cast<::mlir::cir::IntType>().isSigned()">,
+            Or<!foreach(
+              w, widths,
+              CPred<"$_self.cast<::mlir::cir::IntType>().getWidth() == " # w>
+            )>
+        ]>,
+        !interleave(!foreach(w, widths, w # "-bit"), " or ") # " sint",
+        "::mlir::cir::IntType"
+    > {}
+
 //===----------------------------------------------------------------------===//
 // FloatType
 //===----------------------------------------------------------------------===//

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
@@ -55,6 +55,22 @@ static RValue buildUnaryFPBuiltin(CIRGenFunction &CGF, const CallExpr &E) {
   return RValue::get(Call->getResult(0));
 }
 
+template <typename Op>
+static RValue
+buildBuiltinBitOp(CIRGenFunction &CGF, const CallExpr *E,
+                  std::optional<CIRGenFunction::BuiltinCheckKind> CK) {
+  mlir::Value arg;
+  if (CK.has_value())
+    arg = CGF.buildCheckedArgForBuiltin(E->getArg(0), *CK);
+  else
+    arg = CGF.buildScalarExpr(E->getArg(0));
+
+  auto resultTy = CGF.ConvertType(E->getType());
+  auto op =
+      CGF.getBuilder().create<Op>(CGF.getLoc(E->getExprLoc()), resultTy, arg);
+  return RValue::get(op);
+}
+
 RValue CIRGenFunction::buildBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
                                         const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -462,7 +478,7 @@ RValue CIRGenFunction::buildBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BImemcpy:
   case Builtin::BI__builtin_memcpy:
   case Builtin::BImempcpy:
-  case Builtin::BI__builtin_mempcpy:
+  case Builtin::BI__builtin_mempcpy: {
     Address Dest = buildPointerWithAlignment(E->getArg(0));
     Address Src = buildPointerWithAlignment(E->getArg(1));
     mlir::Value SizeVal = buildScalarExpr(E->getArg(2));
@@ -480,6 +496,42 @@ RValue CIRGenFunction::buildBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       return RValue::get(Dest.getPointer());
   }
 
+  case Builtin::BI__builtin_clrsb:
+  case Builtin::BI__builtin_clrsbl:
+  case Builtin::BI__builtin_clrsbll:
+    return buildBuiltinBitOp<mlir::cir::BitClrsbOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__builtin_ctzs:
+  case Builtin::BI__builtin_ctz:
+  case Builtin::BI__builtin_ctzl:
+  case Builtin::BI__builtin_ctzll:
+    return buildBuiltinBitOp<mlir::cir::BitCtzOp>(*this, E, BCK_CTZPassedZero);
+
+  case Builtin::BI__builtin_clzs:
+  case Builtin::BI__builtin_clz:
+  case Builtin::BI__builtin_clzl:
+  case Builtin::BI__builtin_clzll:
+    return buildBuiltinBitOp<mlir::cir::BitClzOp>(*this, E, BCK_CLZPassedZero);
+
+  case Builtin::BI__builtin_ffs:
+  case Builtin::BI__builtin_ffsl:
+  case Builtin::BI__builtin_ffsll:
+    return buildBuiltinBitOp<mlir::cir::BitFfsOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__builtin_parity:
+  case Builtin::BI__builtin_parityl:
+  case Builtin::BI__builtin_parityll:
+    return buildBuiltinBitOp<mlir::cir::BitParityOp>(*this, E, std::nullopt);
+
+  case Builtin::BI__popcnt16:
+  case Builtin::BI__popcnt:
+  case Builtin::BI__popcnt64:
+  case Builtin::BI__builtin_popcount:
+  case Builtin::BI__builtin_popcountl:
+  case Builtin::BI__builtin_popcountll:
+    return buildBuiltinBitOp<mlir::cir::BitPopcountOp>(*this, E, std::nullopt);
+  }
+
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
   // the call using the normal call path, but using the unmangled
   // version of the function name.
@@ -543,6 +595,19 @@ RValue CIRGenFunction::buildBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   return GetUndefRValue(E->getType());
 }
 
+mlir::Value CIRGenFunction::buildCheckedArgForBuiltin(const Expr *E,
+                                                      BuiltinCheckKind Kind) {
+  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero) &&
+         "Unsupported builtin check kind");
+
+  auto value = buildScalarExpr(E);
+  if (!SanOpts.has(SanitizerKind::Builtin))
+    return value;
+
+  assert(!UnimplementedFeature::sanitizerBuiltin());
+  llvm_unreachable("NYI");
+}
+
 static mlir::Value buildTargetArchBuiltinExpr(CIRGenFunction *CGF,
                                               unsigned BuiltinID,
                                               const CallExpr *E,

diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -1509,6 +1509,17 @@ class CIRGenFunction : public CIRGenTypeCache {
   LValue buildCheckedLValue(const Expr *E, TypeCheckKind TCK);
   LValue buildMemberExpr(const MemberExpr *E);
 
+  /// Specifies which type of sanitizer check to apply when handling a
+  /// particular builtin.
+  enum BuiltinCheckKind {
+    BCK_CTZPassedZero,
+    BCK_CLZPassedZero,
+  };
+
+  /// Emits an argument for a call to a builtin. If the builtin sanitizer is
+  /// enabled, a runtime check specified by \p Kind is also emitted.
+  mlir::Value buildCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind);
+
   /// returns true if aggregate type has a volatile member.
   /// TODO(cir): this could be a common AST helper between LLVM / CIR.
   bool hasVolatileMember(QualType T) {

diff --git a/clang/lib/CIR/CodeGen/UnimplementedFeatureGuarding.h b/clang/lib/CIR/CodeGen/UnimplementedFeatureGuarding.h
@@ -58,6 +58,7 @@ struct UnimplementedFeature {
   static bool pointerOverflowSanitizer() { return false; }
   static bool sanitizeDtor() { return false; }
   static bool sanitizeVLABound() { return false; }
+  static bool sanitizerBuiltin() { return false; }
   static bool sanitizerReturn() { return false; }
 
   // ObjC