diff --git a/clang/include/clang/CIR/Dialect/IR/CIROps.td b/clang/include/clang/CIR/Dialect/IR/CIROps.td index 7994cd282d83..523222085db4 100644 --- a/clang/include/clang/CIR/Dialect/IR/CIROps.td +++ b/clang/include/clang/CIR/Dialect/IR/CIROps.td @@ -5411,6 +5411,49 @@ def AtomicCmpXchg : CIR_Op<"atomic.cmp_xchg", let hasVerifier = 0; } +def MemScope_SingleThread : I32EnumAttrCase<"MemScope_SingleThread", + 0, "single_thread">; +def MemScope_System : I32EnumAttrCase<"MemScope_System", + 1, "system">; + +def MemScopeKind : I32EnumAttr< + "MemScopeKind", + "Memory Scope Enumeration", + [MemScope_SingleThread, MemScope_System]> { + let cppNamespace = "::cir"; +} + +def AtomicFence : CIR_Op<"atomic.fence"> { + let summary = "Atomic thread fence"; + let description = [{ + C/C++ Atomic thread fence synchronization primitive. Implements the builtin + `__atomic_thread_fence` which enforces memory ordering constraints across + threads within the specified synchronization scope. + + This handles all variations including: + - `__atomic_thread_fence` + - `__atomic_signal_fence` + - `__c11_atomic_thread_fence` + - `__c11_atomic_signal_fence` + + Example: + ```mlir + cir.atomic.fence system seq_cst + cir.atomic.fence single_thread seq_cst + ``` + + }]; + let results = (outs); + let arguments = (ins Arg:$sync_scope, + Arg:$ordering); + + let assemblyFormat = [{ + $sync_scope $ordering attr-dict + }]; + + let hasVerifier = 0; +} + def SignBitOp : CIR_Op<"signbit", [Pure]> { let summary = "Checks the sign of a floating-point number"; let description = [{ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index b3807cba5828..a5f899320a86 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -16,7 +16,11 @@ #include "CIRGenCstEmitter.h" #include "CIRGenFunction.h" #include "CIRGenModule.h" +#include "CIRGenValue.h" #include "TargetInfo.h" +#include "clang/AST/Expr.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/MissingFeatures.h" // TODO(cir): we shouldn't need this but we currently reuse intrinsic IDs for @@ -30,7 +34,9 @@ #include "clang/Frontend/FrontendDiagnostic.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Value.h" +#include "mlir/Support/LLVM.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "llvm/Support/ErrorHandling.h" @@ -333,6 +339,30 @@ static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf, return returnBool ? op.getResult(1) : op.getResult(0); } +static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf, + const CallExpr *expr, + cir::MemScopeKind syncScope) { + auto &builder = cgf.getBuilder(); + mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0)); + + auto constOrdering = + mlir::dyn_cast(orderingVal.getDefiningOp()); + if (!constOrdering) + llvm_unreachable("NYI: variable ordering not supported"); + + auto constOrderingAttr = + mlir::dyn_cast(constOrdering.getValue()); + if (constOrderingAttr) { + cir::MemOrder ordering = + static_cast(constOrderingAttr.getUInt()); + + builder.create(cgf.getLoc(expr->getSourceRange()), + syncScope, ordering); + } + + return mlir::Value(); +} + static bool typeRequiresBuiltinLaunderImp(const ASTContext &astContext, QualType ty, llvm::SmallPtrSetImpl &seen) { @@ -1863,10 +1893,14 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, llvm_unreachable("BI__atomic_clear NYI"); case Builtin::BI__atomic_thread_fence: + return RValue::get( + makeAtomicFenceValue(*this, E, cir::MemScopeKind::MemScope_System)); case Builtin::BI__atomic_signal_fence: + return RValue::get(makeAtomicFenceValue( + *this, E, cir::MemScopeKind::MemScope_SingleThread)); case Builtin::BI__c11_atomic_thread_fence: case Builtin::BI__c11_atomic_signal_fence: - llvm_unreachable("BI__atomic_thread_fence like NYI"); + llvm_unreachable("BI__c11_atomic_thread_fence like NYI"); case Builtin::BI__builtin_signbit: case Builtin::BI__builtin_signbitf: diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 5d8cf071927b..b8386a038e6d 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -18,6 +18,8 @@ #include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h" #include "mlir/Dialect/DLTI/DLTI.h" #include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Dialect/LLVMIR/LLVMAttrs.h" +#include "mlir/Dialect/LLVMIR/LLVMDialect.h" #include "mlir/Dialect/LLVMIR/Transforms/Passes.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/Builders.h" @@ -41,6 +43,8 @@ #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIROpsEnums.h" #include "clang/CIR/Dialect/Passes.h" #include "clang/CIR/LoweringHelpers.h" #include "clang/CIR/MissingFeatures.h" @@ -3198,6 +3202,11 @@ mlir::LLVM::AtomicOrdering getLLVMAtomicOrder(cir::MemOrder memo) { llvm_unreachable("shouldn't get here"); } +llvm::StringRef getLLVMSyncScope(cir::MemScopeKind syncScope) { + return syncScope == cir::MemScopeKind::MemScope_SingleThread ? "singlethread" + : ""; +} + mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite( cir::AtomicCmpXchg op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -3366,6 +3375,18 @@ mlir::LogicalResult CIRToLLVMAtomicFetchLowering::matchAndRewrite( return mlir::success(); } +mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite( + cir::AtomicFence op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const { + auto llvmOrder = getLLVMAtomicOrder(adaptor.getOrdering()); + auto llvmSyncScope = getLLVMSyncScope(adaptor.getSyncScope()); + + rewriter.replaceOpWithNewOp(op, llvmOrder, + llvmSyncScope); + + return mlir::success(); +} + mlir::LogicalResult CIRToLLVMByteswapOpLowering::matchAndRewrite( cir::ByteswapOp op, OpAdaptor adaptor, mlir::ConversionPatternRewriter &rewriter) const { @@ -4127,6 +4148,7 @@ void populateCIRToLLVMConversionPatterns( CIRToLLVMAtomicCmpXchgLowering, CIRToLLVMAtomicFetchLowering, CIRToLLVMAtomicXchgLowering, + CIRToLLVMAtomicFenceLowering, CIRToLLVMBaseClassAddrOpLowering, CIRToLLVMBinOpLowering, CIRToLLVMBinOpOverflowOpLowering, diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h index 629d148427fd..7bb14c432d07 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h @@ -16,6 +16,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Transforms/DialectConversion.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" namespace cir { namespace direct { @@ -821,6 +822,16 @@ class CIRToLLVMAtomicFetchLowering mlir::ConversionPatternRewriter &) const override; }; +class CIRToLLVMAtomicFenceLowering + : public mlir::OpConversionPattern { +public: + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::AtomicFence op, OpAdaptor, + mlir::ConversionPatternRewriter &) const override; +}; + class CIRToLLVMByteswapOpLowering : public mlir::OpConversionPattern { public: diff --git a/clang/test/CIR/CodeGen/atomic-thread-fence.c b/clang/test/CIR/CodeGen/atomic-thread-fence.c new file mode 100644 index 000000000000..4c71c3c83966 --- /dev/null +++ b/clang/test/CIR/CodeGen/atomic-thread-fence.c @@ -0,0 +1,133 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s + + +struct Data { + int value; + void *ptr; +}; + +typedef struct Data *DataPtr; + +void applyThreadFence() { + __atomic_thread_fence(__ATOMIC_SEQ_CST); +} + +// CIR-LABEL: @applyThreadFence +// CIR: cir.atomic.fence system seq_cst +// CIR: cir.return + +// LLVM-LABEL: @applyThreadFence +// LLVM: fence seq_cst +// LLVM: ret void + +void applySignalFence() { + __atomic_signal_fence(__ATOMIC_SEQ_CST); +} +// CIR-LABEL: @applySignalFence +// CIR: cir.atomic.fence single_thread seq_cst +// CIR: cir.return + +// LLVM-LABEL: @applySignalFence +// LLVM: fence syncscope("singlethread") seq_cst +// LLVM: ret void + +void modifyWithThreadFence(DataPtr d) { + __atomic_thread_fence(__ATOMIC_SEQ_CST); + d->value = 42; +} +// CIR-LABEL: @modifyWithThreadFence +// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] {alignment = 8 : i64} +// CIR: cir.atomic.fence system seq_cst +// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i +// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr>, !cir.ptr +// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr -> !cir.ptr +// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: @modifyWithThreadFence +// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: fence seq_cst +// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8 +// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0 +// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 4 +// LLVM: ret void + +void modifyWithSignalFence(DataPtr d) { + __atomic_signal_fence(__ATOMIC_SEQ_CST); + d->value = 24; +} +// CIR-LABEL: @modifyWithSignalFence +// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] {alignment = 8 : i64} +// CIR: cir.atomic.fence single_thread seq_cst +// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i +// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr>, !cir.ptr +// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr -> !cir.ptr +// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: @modifyWithSignalFence +// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: fence syncscope("singlethread") seq_cst +// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8 +// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0 +// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 4 +// LLVM: ret void + +void loadWithThreadFence(DataPtr d) { + __atomic_thread_fence(__ATOMIC_SEQ_CST); + __atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST); +} +// CIR-LABEL: @loadWithThreadFence +// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] {alignment = 8 : i64} +// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["atomic-temp"] {alignment = 8 : i64} +// CIR: cir.atomic.fence system seq_cst +// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr>, !cir.ptr +// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr -> !cir.ptr> +// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast(bitcast, %[[DATA_VALUE]] : !cir.ptr>), !cir.ptr +// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr, !u64i +// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr>), !cir.ptr +// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr +// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr>, !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: @loadWithThreadFence +// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8 +// LLVM: fence seq_cst +// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8 +// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1 +// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8 +// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8 +// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8 +// LLVM: ret void + +void loadWithSignalFence(DataPtr d) { + __atomic_signal_fence(__ATOMIC_SEQ_CST); + __atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST); +} +// CIR-LABEL: @loadWithSignalFence +// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["d", init] {alignment = 8 : i64} +// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr, !cir.ptr>, ["atomic-temp"] {alignment = 8 : i64} +// CIR: cir.atomic.fence single_thread seq_cst +// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr>, !cir.ptr +// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr -> !cir.ptr> +// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast(bitcast, %[[DATA_PTR]] : !cir.ptr>), !cir.ptr +// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr, !u64i +// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr>), !cir.ptr +// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr +// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr>, !cir.ptr +// CIR: cir.return + +// LLVM-LABEL: @loadWithSignalFence +// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8 +// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8 +// LLVM: fence syncscope("singlethread") seq_cst +// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8 +// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1 +// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8 +// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8 +// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8 +// LLVM: ret void