Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CIR][CIRGen] Support for builtin __atomic_thread_fence #1287

Merged
merged 2 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -5411,6 +5411,49 @@ def AtomicCmpXchg : CIR_Op<"atomic.cmp_xchg",
let hasVerifier = 0;
}

def MemScope_SingleThread : I32EnumAttrCase<"MemScope_SingleThread",
0, "single_thread">;
def MemScope_System : I32EnumAttrCase<"MemScope_System",
1, "system">;

def MemScopeKind : I32EnumAttr<
"MemScopeKind",
"Memory Scope Enumeration",
[MemScope_SingleThread, MemScope_System]> {
let cppNamespace = "::cir";
}

def AtomicFence : CIR_Op<"atomic.fence"> {
let summary = "Atomic thread fence";
let description = [{
C/C++ Atomic thread fence synchronization primitive. Implements the builtin
`__atomic_thread_fence` which enforces memory ordering constraints across
threads within the specified synchronization scope.

This handles all variations including:
- `__atomic_thread_fence`
- `__atomic_signal_fence`
- `__c11_atomic_thread_fence`
- `__c11_atomic_signal_fence`

Example:
```mlir
cir.atomic.fence system seq_cst
cir.atomic.fence single_thread seq_cst
```

}];
let results = (outs);
let arguments = (ins Arg<MemScopeKind, "sync scope">:$sync_scope,
Arg<MemOrder, "memory order">:$ordering);

let assemblyFormat = [{
$sync_scope $ordering attr-dict
}];

let hasVerifier = 0;
}

def SignBitOp : CIR_Op<"signbit", [Pure]> {
let summary = "Checks the sign of a floating-point number";
let description = [{
Expand Down
36 changes: 35 additions & 1 deletion clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,11 @@
#include "CIRGenCstEmitter.h"
#include "CIRGenFunction.h"
#include "CIRGenModule.h"
#include "CIRGenValue.h"
#include "TargetInfo.h"
#include "clang/AST/Expr.h"
#include "clang/CIR/Dialect/IR/CIRAttrs.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include "clang/CIR/MissingFeatures.h"

// TODO(cir): we shouldn't need this but we currently reuse intrinsic IDs for
Expand All @@ -30,7 +34,9 @@
#include "clang/Frontend/FrontendDiagnostic.h"

#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/Value.h"
#include "mlir/Support/LLVM.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "llvm/Support/ErrorHandling.h"

Expand Down Expand Up @@ -333,6 +339,30 @@ static mlir::Value MakeAtomicCmpXchgValue(CIRGenFunction &cgf,
return returnBool ? op.getResult(1) : op.getResult(0);
}

static mlir::Value makeAtomicFenceValue(CIRGenFunction &cgf,
const CallExpr *expr,
cir::MemScopeKind syncScope) {
auto &builder = cgf.getBuilder();
mlir::Value orderingVal = cgf.emitScalarExpr(expr->getArg(0));

auto constOrdering =
mlir::dyn_cast<cir::ConstantOp>(orderingVal.getDefiningOp());
if (!constOrdering)
llvm_unreachable("NYI: variable ordering not supported");

auto constOrderingAttr =
mlir::dyn_cast<cir::IntAttr>(constOrdering.getValue());
if (constOrderingAttr) {
cir::MemOrder ordering =
static_cast<cir::MemOrder>(constOrderingAttr.getUInt());

builder.create<cir::AtomicFence>(cgf.getLoc(expr->getSourceRange()),
syncScope, ordering);
}

return mlir::Value();
}

static bool
typeRequiresBuiltinLaunderImp(const ASTContext &astContext, QualType ty,
llvm::SmallPtrSetImpl<const Decl *> &seen) {
Expand Down Expand Up @@ -1863,10 +1893,14 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm_unreachable("BI__atomic_clear NYI");

case Builtin::BI__atomic_thread_fence:
return RValue::get(
makeAtomicFenceValue(*this, E, cir::MemScopeKind::MemScope_System));
case Builtin::BI__atomic_signal_fence:
return RValue::get(makeAtomicFenceValue(
*this, E, cir::MemScopeKind::MemScope_SingleThread));
case Builtin::BI__c11_atomic_thread_fence:
case Builtin::BI__c11_atomic_signal_fence:
llvm_unreachable("BI__atomic_thread_fence like NYI");
llvm_unreachable("BI__c11_atomic_thread_fence like NYI");

case Builtin::BI__builtin_signbit:
case Builtin::BI__builtin_signbitf:
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include "mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h"
#include "mlir/Dialect/DLTI/DLTI.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/Dialect/LLVMIR/LLVMAttrs.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/Transforms/Passes.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
Expand All @@ -41,6 +43,8 @@
#include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.h"
#include "mlir/Target/LLVMIR/Export.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"
#include "clang/CIR/Dialect/IR/CIROpsEnums.h"
#include "clang/CIR/Dialect/Passes.h"
#include "clang/CIR/LoweringHelpers.h"
#include "clang/CIR/MissingFeatures.h"
Expand Down Expand Up @@ -3198,6 +3202,11 @@ mlir::LLVM::AtomicOrdering getLLVMAtomicOrder(cir::MemOrder memo) {
llvm_unreachable("shouldn't get here");
}

llvm::StringRef getLLVMSyncScope(cir::MemScopeKind syncScope) {
return syncScope == cir::MemScopeKind::MemScope_SingleThread ? "singlethread"
: "";
}

mlir::LogicalResult CIRToLLVMAtomicCmpXchgLowering::matchAndRewrite(
cir::AtomicCmpXchg op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down Expand Up @@ -3366,6 +3375,18 @@ mlir::LogicalResult CIRToLLVMAtomicFetchLowering::matchAndRewrite(
return mlir::success();
}

mlir::LogicalResult CIRToLLVMAtomicFenceLowering::matchAndRewrite(
cir::AtomicFence op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
auto llvmOrder = getLLVMAtomicOrder(adaptor.getOrdering());
auto llvmSyncScope = getLLVMSyncScope(adaptor.getSyncScope());

rewriter.replaceOpWithNewOp<mlir::LLVM::FenceOp>(op, llvmOrder,
llvmSyncScope);

return mlir::success();
}

mlir::LogicalResult CIRToLLVMByteswapOpLowering::matchAndRewrite(
cir::ByteswapOp op, OpAdaptor adaptor,
mlir::ConversionPatternRewriter &rewriter) const {
Expand Down Expand Up @@ -4127,6 +4148,7 @@ void populateCIRToLLVMConversionPatterns(
CIRToLLVMAtomicCmpXchgLowering,
CIRToLLVMAtomicFetchLowering,
CIRToLLVMAtomicXchgLowering,
CIRToLLVMAtomicFenceLowering,
CIRToLLVMBaseClassAddrOpLowering,
CIRToLLVMBinOpLowering,
CIRToLLVMBinOpOverflowOpLowering,
Expand Down
11 changes: 11 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "mlir/IR/MLIRContext.h"
#include "mlir/Interfaces/DataLayoutInterfaces.h"
#include "mlir/Transforms/DialectConversion.h"
#include "clang/CIR/Dialect/IR/CIRDialect.h"

namespace cir {
namespace direct {
Expand Down Expand Up @@ -821,6 +822,16 @@ class CIRToLLVMAtomicFetchLowering
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMAtomicFenceLowering
: public mlir::OpConversionPattern<cir::AtomicFence> {
public:
using mlir::OpConversionPattern<cir::AtomicFence>::OpConversionPattern;

mlir::LogicalResult
matchAndRewrite(cir::AtomicFence op, OpAdaptor,
mlir::ConversionPatternRewriter &) const override;
};

class CIRToLLVMByteswapOpLowering
: public mlir::OpConversionPattern<cir::ByteswapOp> {
public:
Expand Down
133 changes: 133 additions & 0 deletions clang/test/CIR/CodeGen/atomic-thread-fence.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s


struct Data {
int value;
void *ptr;
};

typedef struct Data *DataPtr;

void applyThreadFence() {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
}

// CIR-LABEL: @applyThreadFence
// CIR: cir.atomic.fence system seq_cst
// CIR: cir.return

// LLVM-LABEL: @applyThreadFence
// LLVM: fence seq_cst
// LLVM: ret void

void applySignalFence() {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
}
// CIR-LABEL: @applySignalFence
// CIR: cir.atomic.fence single_thread seq_cst
// CIR: cir.return

// LLVM-LABEL: @applySignalFence
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: ret void

void modifyWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
d->value = 42;
}
// CIR-LABEL: @modifyWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence system seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<42> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!ty_Data> -> !cir.ptr<!s32i>
// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 42, ptr %[[DATA_VALUE]], align 4
// LLVM: ret void

void modifyWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
d->value = 24;
}
// CIR-LABEL: @modifyWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: cir.atomic.fence single_thread seq_cst
// CIR: %[[VAL_42:.*]] = cir.const #cir.int<24> : !s32i
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][0] {name = "value"} : !cir.ptr<!ty_Data> -> !cir.ptr<!s32i>
// CIR: cir.store %[[VAL_42]], %[[DATA_VALUE]] : !s32i, !cir.ptr<!s32i>
// CIR: cir.return

// LLVM-LABEL: @modifyWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 0
// LLVM: store i32 24, ptr %[[DATA_VALUE]], align 4
// LLVM: ret void

void loadWithThreadFence(DataPtr d) {
__atomic_thread_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
}
// CIR-LABEL: @loadWithThreadFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence system seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
// CIR: %[[DATA_VALUE:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!ty_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_VALUE:.*]] = cir.cast(bitcast, %[[DATA_VALUE]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_VALUE]] : !cir.ptr<!u64i>, !u64i
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD_PTR:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithThreadFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD:.*]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void

void loadWithSignalFence(DataPtr d) {
__atomic_signal_fence(__ATOMIC_SEQ_CST);
__atomic_load_n(&d->ptr, __ATOMIC_SEQ_CST);
}
// CIR-LABEL: @loadWithSignalFence
// CIR: %[[DATA:.*]] = cir.alloca !cir.ptr<!ty_Data>, !cir.ptr<!cir.ptr<!ty_Data>>, ["d", init] {alignment = 8 : i64}
// CIR: %[[ATOMIC_TEMP:.*]] = cir.alloca !cir.ptr<!void>, !cir.ptr<!cir.ptr<!void>>, ["atomic-temp"] {alignment = 8 : i64}
// CIR: cir.atomic.fence single_thread seq_cst
// CIR: %[[LOAD_DATA:.*]] = cir.load %[[DATA]] : !cir.ptr<!cir.ptr<!ty_Data>>, !cir.ptr<!ty_Data>
// CIR: %[[DATA_PTR:.*]] = cir.get_member %[[LOAD_DATA]][1] {name = "ptr"} : !cir.ptr<!ty_Data> -> !cir.ptr<!cir.ptr<!void>>
// CIR: %[[CASTED_DATA_PTR:.*]] = cir.cast(bitcast, %[[DATA_PTR]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
// CIR: %[[ATOMIC_LOAD:.*]] = cir.load atomic(seq_cst) %[[CASTED_DATA_PTR]] : !cir.ptr<!u64i>, !u64i
// CIR: %[[CASTED_ATOMIC_TEMP:.*]] = cir.cast(bitcast, %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>), !cir.ptr<!u64i>
// CIR: cir.store %[[ATOMIC_LOAD]], %[[CASTED_ATOMIC_TEMP]] : !u64i, !cir.ptr<!u64i>
// CIR: %[[LOAD_ATOMIC_TEMP:.*]] = cir.load %[[ATOMIC_TEMP]] : !cir.ptr<!cir.ptr<!void>>, !cir.ptr<!void>
// CIR: cir.return

// LLVM-LABEL: @loadWithSignalFence
// LLVM: %[[DATA:.*]] = alloca ptr, i64 1, align 8
// LLVM: %[[DATA_TEMP:.*]] = alloca ptr, i64 1, align 8
// LLVM: fence syncscope("singlethread") seq_cst
// LLVM: %[[DATA_PTR:.*]] = load ptr, ptr %[[DATA]], align 8
// LLVM: %[[DATA_VALUE:.*]] = getelementptr %struct.Data, ptr %[[DATA_PTR]], i32 0, i32 1
// LLVM: %[[ATOMIC_LOAD:.*]] = load atomic i64, ptr %[[DATA_VALUE]] seq_cst, align 8
// LLVM: store i64 %[[ATOMIC_LOAD]], ptr %[[DATA_TEMP]], align 8
// LLVM: %[[DATA_TEMP_LOAD]] = load ptr, ptr %[[DATA_TEMP]], align 8
// LLVM: ret void