From 642531228f052ee14cdab33b1b2571c98f032b3e Mon Sep 17 00:00:00 2001 From: Rajveer Date: Tue, 28 Jan 2025 16:52:09 +0530 Subject: [PATCH] [CIR][CIRGen] Support for builtin __atomic_thread_fence Resolves #1274 Implements atomic thread fence synchronization primitive corresponding to `atomic.thread_fence` CIR. --- clang/include/clang/CIR/Dialect/IR/CIROps.td | 41 +++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 34 ++++++++++++++- .../CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp | 9 ++++ .../CIR/Lowering/DirectToLLVM/LowerToLLVM.h | 11 +++++ clang/test/CIR/CodeGen/atomic-thread-fence.c | 40 ++++++++++++++++++ 5 files changed, 134 insertions(+), 1 deletion(-) create mode 100644 clang/test/CIR/CodeGen/atomic-thread-fence.c diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 2659471e7e38..79fa38784932 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -5409,6 +5409,47 @@ def AtomicCmpXchg : CIR_Op<"atomic.cmp_xchg", let hasVerifier = 0; } +def MemScope_SingleThread : I32EnumAttrCase<"MemScope_SingleThread", + 0, "single_thread">; +def MemScope_System : I32EnumAttrCase<"MemScope_System", + 1, "system">; + +def MemScopeKind : I32EnumAttr< + "MemScopeKind", + "Memory Scope Enumeration", + [MemScope_SingleThread, MemScope_System]> { + let cppNamespace = "::cir"; +} + +def AtomicFence : CIR_Op<"atomic.fence"> { + let summary = "Atomic thread fence"; + let description = [{ + C/C++ Atomic thread fence synchronization primitive. Implements the builtin + `__atomic_thread_fence` which enforces memory ordering constraints across + threads within the specified synchronization scope. + + This handles all variations including: + - `__atomic_thread_fence` + - `__atomic_signal_fence` + - `__c11_atomic_thread_fence` + - `__c11_atomic_signal_fence` + + Example: + + + }]; + let results = (outs); + let arguments = (ins Arg:$sync_scope, + Arg:$ordering); + + let assemblyFormat = [{ + `(` `sync_scope` `=` $sync_scope `,` + `ordering` `=` $ordering `)` attr-dict + }]; + + let hasVerifier = 0; +} + def SignBitOp : CIR_Op<"signbit", [Pure]> { let summary = "Checks the sign of a floating-point number"; let description = [{ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index bb285e4811e3..cd6c599da39c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -16,7 +16,10 @@ #include "CIRGenCstEmitter.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "CIRGenValue.h" #include "TargetInfo.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/MissingFeatures.h" // TODO(cir): we shouldn't need this but we currently reuse intrinsic IDs for @@ -30,6 +33,7 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Value.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "llvm/Support/ErrorHandling.h" @@ -330,6 +334,30 @@ static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf, return returnBool ? op.getResult(1) : op.getResult(0); } +static mlir::Value MakeAtomicFenceValue(CIRGenFunction &cgf, + const CallExpr *expr, + cir::MemScopeKind syncScope) { + QualType typ = expr->getType(); + auto &builder = cgf.getBuilder(); + + auto intType = + expr->getArg(0)->getType()->getPointeeType()->isUnsignedIntegerType() + ? builder.getUIntNTy(cgf.getContext().getTypeSize(typ)) + : builder.getSIntNTy(cgf.getContext().getTypeSize(typ)); + + auto orderingVal = + emitToInt(cgf, cgf.emitScalarExpr(expr->getArg(0)), typ, intType); + auto orderingAttr = + orderingVal.getDefiningOp()->getAttrOfType("value"); + + cir::MemOrder ordering = static_cast(orderingAttr.getInt()); + + builder.create(cgf.getLoc(expr->getSourceRange()), + syncScope, ordering); + + return mlir::Value(); +} + static bool typeRequiresBuiltinLaunderImp(const ASTContext &astContext, QualType ty, llvm::SmallPtrSetImpl &seen) { @@ -1840,10 +1868,14 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm_unreachable("BI__atomic_clear NYI"); case Builtin::BI__atomic_thread_fence: + return RValue::get( + MakeAtomicFenceValue(*this, E, cir::MemScopeKind::MemScope_System)); case Builtin::BI__atomic_signal_fence: + return RValue::get(MakeAtomicFenceValue( + *this, E, cir::MemScopeKind::MemScope_SingleThread)); case Builtin::BI__c11_atomic_thread_fence: case Builtin::BI__c11_atomic_signal_fence: - llvm_unreachable("BI__atomic_thread_fence like NYI"); + llvm_unreachable("BI__c11_atomic_thread_fence like NYI"); case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 3462e90ff5f4..a0522fb65082 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -18,6 +18,7 @@ #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" #include "mlir/Dialect/LLVMIR/Transforms/Passes.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -41,6 +42,8 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/LoweringHelpers.h" #include "clang/CIR/MissingFeatures.h" @@ -3399,6 +3402,12 @@ mlir::LogicalResult CIRToLLVMAtomicFetchLowering::matchAndRewrite( return mlir::success(); } +// mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite( +// cir::AtomicFence op, OpAdaptor adaptor, +// mlir::ConversionPatternRewriter &rewriter) const { +// return mlir::success(); +// } + mlir::LogicalResult CIRToLLVMByteswapOpLowering::matchAndRewrite( cir::ByteswapOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 104ce3a0b105..3b41b59f8d5b 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -16,6 +16,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Transforms/DialectConversion.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" namespace cir { namespace direct { @@ -820,6 +821,16 @@ class CIRToLLVMAtomicFetchLowering mlir::ConversionPatternRewriter &) const override; }; +// class CIRToLLVMAtomicFenceLowering +// : public mlir::OpConversionPattern { +// public: +// using mlir::OpConversionPattern::OpConversionPattern; +// +// mlir::LogicalResult +// matchAndRewrite(cir::AtomicFence op, OpAdaptor, +// mlir::ConversionPatternRewriter &) const override; +// }; + class CIRToLLVMByteswapOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/atomic-thread-fence.c b/clang/test/CIR/CodeGen/atomic-thread-fence.c new file mode 100644 index 000000000000..b2183132836a --- /dev/null +++ b/clang/test/CIR/CodeGen/atomic-thread-fence.c @@ -0,0 +1,40 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s +// UN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// UN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + + +struct Data { + int value; + void *ptr; +}; + +typedef struct Data *DataPtr; + +void applyThreadFence() { + __atomic_thread_fence(5); +} + +void applySignalFence() { + __atomic_signal_fence(5); +} + +void modifyWithThreadFence(DataPtr d) { + __atomic_thread_fence(5); + d->value = 42; +} + +void modifyWithSignalFence(DataPtr d) { + __atomic_signal_fence(5); + d->value = 24; +} + +void loadWithThreadFence(DataPtr d) { + __atomic_thread_fence(5); + __atomic_load_n(&d->ptr, 5); +} + +void loadWithSignalFence(DataPtr d) { + __atomic_signal_fence(5); + __atomic_load_n(&d->ptr, 5); +}