From 13688ae9ee498346fee08f5b0e551ec8b569a6dc Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Fri, 31 Jan 2025 09:59:36 -0500 Subject: [PATCH] [CSI] Fix instrumentation around sync and sync.unwind instructions. Fix promotion of calls to invokes when potentially-throwing calls are inside tasks with unwind destinations. --- clang/test/Cilk/cilk-mixed-unwind-codegen.cpp | 37 +++ .../ComprehensiveStaticInstrumentation.cpp | 52 ++- llvm/lib/Transforms/Utils/TapirUtils.cpp | 59 ++-- .../csi-instrument-sync-with-unwind.ll | 69 ++++ .../csi-setup-mixed-task-lpad.ll | 185 +++++++++++ .../csi-sync-unwind-loopexit-multiple.ll | 313 ++++++++++++++++++ .../CilkSanitizer/csi-sync-unwind-loopexit.ll | 235 +++++++++++++ .../split-unreachable-predecessors.ll | 3 - .../mixed-predecessors-of-unreachable.ll | 91 +++++ 9 files changed, 995 insertions(+), 49 deletions(-) create mode 100644 clang/test/Cilk/cilk-mixed-unwind-codegen.cpp create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll create mode 100644 llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll diff --git a/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp b/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp new file mode 100644 index 000000000000..0dddd92d1419 --- /dev/null +++ b/clang/test/Cilk/cilk-mixed-unwind-codegen.cpp @@ -0,0 +1,37 @@ +// Check that Clang may generate functions calls that can throw with or without +// a landingpad in the same Cilk scope. +// +// RUN: %clang_cc1 -fopencilk -fcxx-exceptions -fexceptions -ftapir=none -triple x86_64-unknown-linux-gnu -std=c++11 -emit-llvm %s -o - | FileCheck %s +// expected-no-diagnostics + +int bar(int n); +void foo(int n) { + cilk_for (int i = 0; i < n; ++i) { + int w = bar(i); + throw bar(w); + } +} + +// CHECK-LABEL: define {{.*}}void @_Z3fooi(i32 {{.*}}%n) + +// Check for detach with an unwind destination +// CHECK: detach within %[[SYNCREG:.+]], label %[[PFOR_BODY_ENTRY:.+]], label %[[PFOR_INC:.+]] unwind label %[[DETACH_LPAD:.+]] + +// CHECK: [[PFOR_BODY_ENTRY]]: + +// Check for call to function bar that might throw. +// CHECK: call {{.*}}i32 @_Z3bari(i32 + +// Check for invoke of function bar +// CHECK: invoke noundef i32 @_Z3bari(i32 +// CHECK-NEXT: to label %[[INVOKE_CONT:.+]] unwind label %[[TASK_LPAD:.+]] + +// CHECK: [[INVOKE_CONT]]: +// CHECK: call void @__cxa_throw(ptr +// CHECK-NEXT: unreachable + +// CHECK: [[TASK_LPAD]]: +// CHECK-NEXT: landingpad +// CHECK-NEXT: cleanup +// CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %[[SYNCREG]], { ptr, i32 } %{{.*}}) +// CHECK-NEXT: to label %[[UNREACHABLE:.+]] unwind label %[[DETACH_LPAD]] diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index a36f011a3968..7ac26a7a5572 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -24,12 +24,14 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" #include "llvm/IR/EHPersonalities.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" @@ -1138,10 +1140,19 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { } void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) { - IRBuilder<> IRB(&*BB.getFirstInsertionPt()); + Instruction *InsertPt = &*BB.getFirstInsertionPt(); bool IsEntry = isEntryBlock(BB, TI); if (IsEntry) - IRB.SetInsertPoint(getEntryBBInsertPt(BB)); + InsertPt = getEntryBBInsertPt(BB); + // Skip any sync.unwind intrinsics, which need to remain paired with + // corresponding syncs. + if (isSyncUnwind(InsertPt)) + InsertPt = InsertPt->getNextNode(); + // Skip any taskframe.end intrinsics, to keep the basic-block instrumentation + // in the same basic block. + if (isTapirIntrinsic(Intrinsic::taskframe_end, InsertPt)) + InsertPt = InsertPt->getNextNode(); + IRBuilder<> IRB(InsertPt); uint64_t LocalId = BasicBlockFED.add(BB); uint64_t BBSizeId = BBSize.add(BB, GetTTI ? &(*GetTTI)(*BB.getParent()) : nullptr); @@ -1235,8 +1246,24 @@ void CSIImpl::instrumentLoop(Loop &L, TaskInfo &TI, ScalarEvolution *SE) { insertHookCall(&*IRB.GetInsertPoint(), CsiLoopBodyEntry, {LoopCsiId, LoopPropVal}); + SmallPtrSet ExitingBlocksVisited; // Insert hooks at the ends of the exiting blocks. - for (BasicBlock *BB : ExitingBlocks) { + while (!ExitingBlocks.empty()) { + BasicBlock *BB = ExitingBlocks.pop_back_val(); + if (!ExitingBlocksVisited.insert(BB).second) + continue; + if (isSyncUnwind(BB->getTerminator())) { + // Insert the loopbody_exit hook before the sync instruction, rather than + // the sync.unwind. + // TODO: I don't think there's anything preventing a sync.unwind from + // having multiple sync-instruction predecessors, so all such predecessors + // need to be addressed. This logic should become simpler if sync itself + // is modified to have an unwind destination. + for (BasicBlock *Pred : predecessors(BB)) + ExitingBlocks.push_back(Pred); + continue; + } + // Record properties of this loop exit CsiLoopExitProperty LoopExitProp; LoopExitProp.setIsLatch(L.isLoopLatch(BB)); @@ -1806,13 +1833,16 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, ArrayRef HookArgs, ArrayRef DefaultArgs) { assert(HookFunction && "No hook function given."); + Instruction *InsertPt = &*Succ->getFirstInsertionPt(); + if (isSyncUnwind(InsertPt)) + InsertPt = InsertPt->getNextNode(); + // If this successor block has a unique predecessor, just insert the hook call // as normal. if (Succ->getUniquePredecessor()) { assert(Succ->getUniquePredecessor() == BB && "BB is not unique predecessor of successor block"); - return insertHookCall(&*Succ->getFirstInsertionPt(), HookFunction, - HookArgs); + return insertHookCall(InsertPt, HookFunction, HookArgs); } if (updateArgPHIs(Succ, BB, HookFunction, HookArgs, DefaultArgs)) @@ -1823,7 +1853,7 @@ CallInst *CSIImpl::insertHookCallInSuccessorBB(BasicBlock *Succ, BasicBlock *BB, for (PHINode *ArgPHI : ArgPHIs[Key]) SuccessorHookArgs.push_back(ArgPHI); - IRBuilder<> IRB(&*Succ->getFirstInsertionPt()); + IRBuilder<> IRB(InsertPt); // Insert the hook call, using the PHI as the CSI ID. CallInst *Call = IRB.CreateCall(HookFunction, SuccessorHookArgs); setInstrumentationDebugLoc(*Succ, (Instruction *)Call); @@ -2747,6 +2777,11 @@ void CSIImpl::instrumentFunction(Function &F) { for (BasicBlock *BB : BasicBlocks) instrumentBasicBlock(*BB, TI); + if (Options.InstrumentLoops) + // Recursively instrument all loops + for (Loop *L : LI) + instrumentLoop(*L, TI, SE); + // Instrument Tapir constructs. if (Options.InstrumentTapir) { if (Config->DoesFunctionRequireInstrumentationForPoint( @@ -2768,11 +2803,6 @@ void CSIImpl::instrumentFunction(Function &F) { for (Instruction *I : Allocas) instrumentAlloca(I, TI); - if (Options.InstrumentLoops) - // Recursively instrument all loops - for (Loop *L : LI) - instrumentLoop(*L, TI, SE); - // Do this work in a separate loop after copying the iterators so that we // aren't modifying the list as we're iterating. if (Options.InstrumentMemoryAccesses) diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index fb2c1becfa8b..f24ae286fcf3 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/TapirUtils.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" @@ -85,16 +86,13 @@ bool llvm::isSkippableTapirIntrinsic(const Instruction *I) { /// Returns true if the given basic block \p B is a placeholder successor of a /// taskframe.resume or detached.rethrow. bool llvm::isTapirPlaceholderSuccessor(const BasicBlock *B) { - for (const BasicBlock *Pred : predecessors(B)) { + return llvm::any_of(predecessors(B), [&](const BasicBlock *Pred) { if (!isDetachedRethrow(Pred->getTerminator()) && !isTaskFrameResume(Pred->getTerminator())) return false; - const InvokeInst *II = dyn_cast(Pred->getTerminator()); - if (B != II->getNormalDest()) - return false; - } - return true; + return B == II->getNormalDest(); + }); } /// Returns a taskframe.resume that uses the given taskframe, or nullptr if no @@ -2166,37 +2164,28 @@ static void promoteCallsInTasksHelper( // spawned task recursively. if (DetachInst *DI = dyn_cast(BB->getTerminator())) { Processed.insert(BB); - if (!DI->hasUnwindDest()) { - // Create an unwind edge for the subtask, which is terminated with a - // detached-rethrow. - BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( - Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge, - Unreachable, DI); - // Recursively check all blocks in the detached task. - promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, - Unreachable, CurrentTaskFrame, &Worklist, - Processed, IgnoreFunctionCheck); - // If the new unwind edge is not used, remove it. - if (pred_empty(SubTaskUnwindEdge)) - SubTaskUnwindEdge->eraseFromParent(); - else - DetachesToReplace.push_back(DI); - } else { - // Because this detach has an unwind destination, any calls in the - // spawned task that may throw should already be invokes. Hence there - // is no need to promote calls in this task. - if (IgnoreFunctionCheck) { - // This recursive call should only apply IgnoreFunctionCheck to callsites. - promoteCallsInTasksHelper(DI->getDetached(), DI->getUnwindDest(), - Unreachable, CurrentTaskFrame, &Worklist, - Processed, IgnoreFunctionCheck); - } + // Create an unwind edge for the subtask, which is terminated with a + // detached-rethrow. + BasicBlock *SubTaskUnwindEdge = CreateSubTaskUnwindEdge( + Intrinsic::detached_rethrow, DI->getSyncRegion(), + DI->hasUnwindDest() ? DI->getUnwindDest() : UnwindEdge, Unreachable, + DI); + // Recursively check all blocks in the detached task. + promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, + Unreachable, CurrentTaskFrame, &Worklist, + Processed, IgnoreFunctionCheck); + + // If the new unwind edge is not used, remove it. + if (pred_empty(SubTaskUnwindEdge)) + SubTaskUnwindEdge->eraseFromParent(); + else if (!DI->hasUnwindDest()) + DetachesToReplace.push_back(DI); + + if (DI->hasUnwindDest() && Visited.insert(DI->getUnwindDest()).second) + // If the detach-unwind isn't dead, add it to the worklist. + Worklist.push_back(DI->getUnwindDest()); - if (Visited.insert(DI->getUnwindDest()).second) - // If the detach-unwind isn't dead, add it to the worklist. - Worklist.push_back(DI->getUnwindDest()); - } // Add the continuation to the worklist. if (isTaskFrameResume(UnwindEdge->getTerminator()) && (CurrentTaskFrame == getTaskFrameUsed(DI->getDetached()))) { diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll new file mode 100644 index 000000000000..a3b2d5084faf --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-instrument-sync-with-unwind.ll @@ -0,0 +1,69 @@ +; Check that CSI does not insert instrumentation between a sync and its corresponding sync.unwind. +; +; RUN: opt < %s -passes="csi-setup,csi" -csi-instrument-basic-blocks=false -S | FileCheck %s --check-prefixes=CHECK +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s --check-prefixes=CHECK,CHECK-BB +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #0 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #1 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #0 + +define fastcc void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2() personality ptr null { +pfor.cond522.preheader.ls2: + %syncreg529.ls2 = tail call token @llvm.syncregion.start() + br label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + +pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2: ; preds = %sync.continue578.ls2, %pfor.cond522.preheader.ls2 + %0 = tail call token @llvm.taskframe.create() + detach within %syncreg529.ls2, label %det.achd554.ls2, label %det.cont569.ls2 + +det.cont569.ls2: ; preds = %det.achd554.ls2, %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + sync within %syncreg529.ls2, label %sync.continue578.ls2 + +sync.continue578.ls2: ; preds = %det.cont569.ls2 + tail call void @llvm.sync.unwind(token %syncreg529.ls2) #2 + tail call void @llvm.taskframe.end(token %0) + br i1 false, label %pfor.cond.cleanup599.ls2.tfend, label %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + +det.achd554.ls2: ; preds = %pfor.body.entry525.tf.tf.tf.tf.tf.tf.tf.tf.ls2 + reattach within %syncreg529.ls2, label %det.cont569.ls2 + +pfor.cond.cleanup599.ls2.tfend: ; preds = %sync.continue578.ls2 + ret void +} + +; CHECK: define {{.*}}void @_Z28prove_sumcheck_cubic_batchedR16ProverTranscriptRKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE14GoldilockField8FixedVecIP9DensePolyESD_RSB_SD_SD_SD_St4spanIKS9_Lm18446744073709551615EE.outline_pfor.cond522.ls2() + +; CHECK: %syncreg529.ls2 = {{.*}}call token @llvm.syncregion.start() +; CHECK: %[[TF:.+]] = {{.*}}call token @llvm.taskframe.create() + +; CHECK: sync within %syncreg529.ls2, label %[[SYNC_CONT:.+]] + +; CHECK: [[SYNC_CONT]]: +; CHECK-NOT: call void @__csi_ +; CHECK-NEXT: void @llvm.sync.unwind(token %syncreg529.ls2 +; CHECK: call void @__csi_after_sync( +; CHECK-BB-NOT: @__csi_bb_ +; CHECK: call void @llvm.taskframe.end(token %[[TF]]) + +; CHECK-BB: call void @__csi_bb_entry( + +; CHECK: call void @__csi_loopbody_exit( + +; CHECK: reattach within %syncreg529.ls2 + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { willreturn memory(argmem: readwrite) } +attributes #2 = { nounwind } diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll new file mode 100644 index 000000000000..72c4eb02e604 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-setup-mixed-task-lpad.ll @@ -0,0 +1,185 @@ +; Check that csi-setup properly promotes calls to invokes when a call that might throw is inside a task with a detach-unwind. +; +; RUN: opt < %s -passes="csi-setup" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@_ZTIi = external constant ptr + +; Function Attrs: mustprogress noinline optnone sanitize_cilk uwtable +define dso_local void @_Z3fooi(i32 noundef %n) #0 personality ptr @__gxx_personality_v0 { +entry: + %n.addr = alloca i32, align 4 + %syncreg = call token @llvm.syncregion.start() + %__init = alloca i32, align 4 + %__limit = alloca i32, align 4 + %__begin = alloca i32, align 4 + %__end = alloca i32, align 4 + %exn.slot4 = alloca ptr, align 8 + %ehselector.slot5 = alloca i32, align 4 + store i32 %n, ptr %n.addr, align 4 + store i32 0, ptr %__init, align 4 + %0 = load i32, ptr %n.addr, align 4 + store i32 %0, ptr %__limit, align 4 + %1 = load i32, ptr %__init, align 4 + %2 = load i32, ptr %__limit, align 4 + %cmp = icmp slt i32 %1, %2 + br i1 %cmp, label %pfor.ph, label %pfor.end + +pfor.ph: ; preds = %entry + store i32 0, ptr %__begin, align 4 + %3 = load i32, ptr %__limit, align 4 + %4 = load i32, ptr %__init, align 4 + %sub = sub nsw i32 %3, %4 + store i32 %sub, ptr %__end, align 4 + br label %pfor.cond + +pfor.cond: ; preds = %pfor.inc, %pfor.ph + br label %pfor.detach + +pfor.detach: ; preds = %pfor.cond + %5 = load i32, ptr %__init, align 4 + %6 = load i32, ptr %__begin, align 4 + %add = add nsw i32 %5, %6 + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad3 + +pfor.body.entry: ; preds = %pfor.detach + %i = alloca i32, align 4 + %w = alloca i32, align 4 + %exn.slot = alloca ptr, align 8 + %ehselector.slot = alloca i32, align 4 + store i32 %add, ptr %i, align 4 + br label %pfor.body + +pfor.body: ; preds = %pfor.body.entry + %7 = load i32, ptr %i, align 4 + %call = call noundef i32 @_Z3bari(i32 noundef %7) + store i32 %call, ptr %w, align 4 + %exception = call ptr @__cxa_allocate_exception(i64 4) #4 + %8 = load i32, ptr %w, align 4 + %call1 = invoke noundef i32 @_Z3bari(i32 noundef %8) + to label %invoke.cont unwind label %lpad + +; CHECK: pfor.body: +; CHECK-NEXT: %[[ARG1:.+]] = load i32, ptr %i +; CHECK-NOT: call {{.*}}i32 @_Z3bari(i32 noundef %{{.*}}) +; CHECK: invoke {{.*}}i32 @_Z3bari(i32 noundef %[[ARG1]]) +; CHECK-NEXT: to label %[[CALL_NOEXC:.+]] unwind label %[[CSI_SETUP_LPAD:.+]] + +; CHECK: [[CALL_NOEXC]]: +; CHECK: %[[ARG2:.+]] = load i32, ptr %w +; CHECK-NEXT: invoke noundef i32 @_Z3bari(i32 noundef %[[ARG2]]) +; CHECK-NEXT: to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %pfor.body + store i32 %call1, ptr %exception, align 16 + call void @__cxa_throw(ptr %exception, ptr @_ZTIi, ptr null) #5 + unreachable + +lpad: ; preds = %pfor.body + %9 = landingpad { ptr, i32 } + cleanup + %10 = extractvalue { ptr, i32 } %9, 0 + store ptr %10, ptr %exn.slot, align 8 + %11 = extractvalue { ptr, i32 } %9, 1 + store i32 %11, ptr %ehselector.slot, align 4 + call void @__cxa_free_exception(ptr %exception) #4 + %exn = load ptr, ptr %exn.slot, align 8 + %sel = load i32, ptr %ehselector.slot, align 4 + %lpad.val = insertvalue { ptr, i32 } undef, ptr %exn, 0 + %lpad.val2 = insertvalue { ptr, i32 } %lpad.val, i32 %sel, 1 + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.val2) + to label %unreachable unwind label %lpad3 + +; CHECK: lpad: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %unreachable unwind label %lpad3 + +pfor.preattach: ; No predecessors! + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.preattach, %pfor.detach + %12 = load i32, ptr %__begin, align 4 + %inc = add nsw i32 %12, 1 + store i32 %inc, ptr %__begin, align 4 + %13 = load i32, ptr %__begin, align 4 + %14 = load i32, ptr %__end, align 4 + %cmp6 = icmp slt i32 %13, %14 + br i1 %cmp6, label %pfor.cond, label %pfor.cond.cleanup, !llvm.loop !6 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue + +lpad3: ; preds = %lpad, %pfor.detach + %15 = landingpad { ptr, i32 } + cleanup + %16 = extractvalue { ptr, i32 } %15, 0 + store ptr %16, ptr %exn.slot4, align 8 + %17 = extractvalue { ptr, i32 } %15, 1 + store i32 %17, ptr %ehselector.slot5, align 4 + br label %eh.resume + +sync.continue: ; preds = %pfor.cond.cleanup + call void @llvm.sync.unwind(token %syncreg) + br label %pfor.end + +pfor.end: ; preds = %sync.continue, %entry + ret void + +eh.resume: ; preds = %lpad3 + %exn7 = load ptr, ptr %exn.slot4, align 8 + %sel8 = load i32, ptr %ehselector.slot5, align 4 + %lpad.val9 = insertvalue { ptr, i32 } poison, ptr %exn7, 0 + %lpad.val10 = insertvalue { ptr, i32 } %lpad.val9, i32 %sel8, 1 + resume { ptr, i32 } %lpad.val10 + +unreachable: ; preds = %lpad + unreachable + +; CHECK: [[CSI_SETUP_LPAD]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %[[CSI_SETUP_UNREACHABLE:.+]] unwind label %lpad3 +} + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +declare noundef i32 @_Z3bari(i32 noundef) #2 + +declare ptr @__cxa_allocate_exception(i64) + +declare i32 @__gxx_personality_v0(...) + +declare void @__cxa_free_exception(ptr) + +declare void @__cxa_throw(ptr, ptr, ptr) + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #3 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #3 + +attributes #0 = { mustprogress noinline optnone sanitize_cilk uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #3 = { willreturn memory(argmem: readwrite) } +attributes #4 = { nounwind } +attributes #5 = { noreturn } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{!"clang version 19.1.7 (git@github.com:OpenCilk/opencilk-project.git e929b19f1ca3426871e22a5843cc9e5725894576)"} +!6 = distinct !{!6, !7, !8} +!7 = !{!"llvm.loop.mustprogress"} +!8 = !{!"tapir.loop.spawn.strategy", i32 1} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll new file mode 100644 index 000000000000..4f12326695c5 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit-multiple.ll @@ -0,0 +1,313 @@ +; Check that CSI loop instrumentation instruments around sync-unwind loop exits properly. +; +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +; Function Attrs: mustprogress uwtable +define dso_local void @_Z3fooi(i32 noundef %n) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %pfor.cond, label %try.cont + +pfor.cond: ; preds = %entry, %pfor.inc + %__begin.0 = phi i32 [ %inc, %pfor.inc ], [ 0, %entry ] + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad69.loopexit + +pfor.body.entry: ; preds = %pfor.cond + %w = alloca i32, align 4 + %syncreg2 = tail call token @llvm.syncregion.start() + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %w) + %and = and i32 %__begin.0, 1 + %tobool.not = icmp eq i32 %and, 0 + %0 = tail call token @llvm.taskframe.create() + br i1 %tobool.not, label %if.else.tf.tf.tf.tf, label %if.then.tf.tf.tf.tf + +if.then.tf.tf.tf.tf: ; preds = %pfor.body.entry + detach within %syncreg2, label %det.achd, label %det.cont unwind label %lpad4 + +det.achd: ; preds = %if.then.tf.tf.tf.tf + %call = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %det.achd + store i32 %call, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont + +det.cont: ; preds = %if.then.tf.tf.tf.tf, %invoke.cont + %add14 = add nuw nsw i32 %__begin.0, 1 + %call16 = invoke noundef i32 @_Z3bari(i32 noundef %add14) + to label %invoke.cont15 unwind label %lpad11.tfsplit.split-lp + +invoke.cont15: ; preds = %det.cont + sync within %syncreg2, label %sync.continue + +; CHECK: invoke.cont15: +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK-NEXT: sync within %syncreg2, label %sync.continue + +sync.continue: ; preds = %invoke.cont15 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont17 unwind label %lpad11.tfsplit.split-lp + +; CHECK: sync.continue: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont17 unwind label %[[CSI_LPAD_SPLIT:.+]] + +invoke.cont17: ; preds = %sync.continue + tail call void @llvm.taskframe.end(token %0) + br label %if.end + +; CHECK: invoke.cont17: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +lpad: ; preds = %det.achd + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %1) + to label %unreachable unwind label %lpad4 + +lpad4: ; preds = %if.then.tf.tf.tf.tf, %lpad + %2 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +; CHECK: [[CSI_LPAD_SPLIT]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad11.tfsplit.split-lp: ; preds = %det.cont, %sync.continue + %lpad.tfsplit.split-lp100 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +lpad11: ; preds = %lpad11.tfsplit.split-lp, %lpad4 + %lpad.phi101 = phi { ptr, i32 } [ %2, %lpad4 ], [ %lpad.tfsplit.split-lp100, %lpad11.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi101) + to label %unreachable unwind label %lpad23.tfsplit + +lpad23.tfsplit: ; preds = %lpad11 + %lpad.tfsplit = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp.tfsplit: ; preds = %lpad48 + %lpad.tfsplit102 = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp.tfsplit.split-lp: ; preds = %if.end + %lpad.tfsplit.split-lp103 = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23: ; preds = %lpad23.tfsplit.split-lp.tfsplit, %lpad23.tfsplit.split-lp.tfsplit.split-lp, %lpad23.tfsplit + %lpad.phi = phi { ptr, i32 } [ %lpad.tfsplit, %lpad23.tfsplit ], [ %lpad.tfsplit102, %lpad23.tfsplit.split-lp.tfsplit ], [ %lpad.tfsplit.split-lp103, %lpad23.tfsplit.split-lp.tfsplit.split-lp ] + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.phi) + to label %unreachable unwind label %lpad69.loopexit + +if.else.tf.tf.tf.tf: ; preds = %pfor.body.entry + detach within %syncreg2, label %det.achd28, label %det.cont42 unwind label %lpad39 + +det.achd28: ; preds = %if.else.tf.tf.tf.tf + %add27 = or disjoint i32 %__begin.0, 1 + %call33 = invoke noundef i32 @_Z3bari(i32 noundef %add27) + to label %invoke.cont32 unwind label %lpad29 + +invoke.cont32: ; preds = %det.achd28 + store i32 %call33, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont42 + +det.cont42: ; preds = %if.else.tf.tf.tf.tf, %invoke.cont32 + %call52 = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont51 unwind label %lpad48.tfsplit.split-lp + +invoke.cont51: ; preds = %det.cont42 + sync within %syncreg2, label %sync.continue53 + +; CHECK: invoke.cont51: +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK-NEXT: sync within %syncreg2, label %sync.continue53 + +sync.continue53: ; preds = %invoke.cont51 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont54 unwind label %lpad48.tfsplit.split-lp + +; CHECK: sync.continue53: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont54 unwind label %[[CSI_LPAD_SPLIT2:.+]] + +invoke.cont54: ; preds = %sync.continue53 + tail call void @llvm.taskframe.end(token %0) + br label %if.end + +; CHECK: invoke.cont54: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +lpad29: ; preds = %det.achd28 + %3 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %3) + to label %unreachable unwind label %lpad39 + +lpad39: ; preds = %if.else.tf.tf.tf.tf, %lpad29 + %4 = landingpad { ptr, i32 } + cleanup + br label %lpad48 + +; CHECK: [[CSI_LPAD_SPLIT2]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad48.tfsplit.split-lp: ; preds = %det.cont42, %sync.continue53 + %lpad.tfsplit.split-lp = landingpad { ptr, i32 } + cleanup + br label %lpad48 + +lpad48: ; preds = %lpad48.tfsplit.split-lp, %lpad39 + %lpad.phi106 = phi { ptr, i32 } [ %4, %lpad39 ], [ %lpad.tfsplit.split-lp, %lpad48.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi106) + to label %unreachable unwind label %lpad23.tfsplit.split-lp.tfsplit + +if.end: ; preds = %invoke.cont54, %invoke.cont17 + %w.0.load110 = load i32, ptr %w, align 4 + %call61 = invoke noundef i32 @_Z3bari(i32 noundef %w.0.load110) + to label %invoke.cont60 unwind label %lpad23.tfsplit.split-lp.tfsplit.split-lp + +invoke.cont60: ; preds = %if.end + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.cond, %invoke.cont60 + %inc = add nuw nsw i32 %__begin.0, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %pfor.cond.cleanup, label %pfor.cond, !llvm.loop !9 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue73 + +lpad69.loopexit: ; preds = %lpad23, %pfor.cond + %lpad.loopexit = landingpad { ptr, i32 } + catch ptr null + br label %lpad69 + +lpad69.loopexit.split-lp: ; preds = %sync.continue73 + %lpad.loopexit.split-lp = landingpad { ptr, i32 } + catch ptr null + br label %lpad69 + +lpad69: ; preds = %lpad69.loopexit.split-lp, %lpad69.loopexit + %lpad.phi109 = phi { ptr, i32 } [ %lpad.loopexit, %lpad69.loopexit ], [ %lpad.loopexit.split-lp, %lpad69.loopexit.split-lp ] + %5 = extractvalue { ptr, i32 } %lpad.phi109, 0 + %6 = tail call ptr @__cxa_begin_catch(ptr %5) #6 + %call83 = invoke noundef i32 @_Z3bari(i32 noundef 0) + to label %invoke.cont82 unwind label %lpad81 + +sync.continue73: ; preds = %pfor.cond.cleanup + invoke void @llvm.sync.unwind(token %syncreg) + to label %try.cont unwind label %lpad69.loopexit.split-lp + +invoke.cont82: ; preds = %lpad69 + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: ; preds = %entry, %sync.continue73, %invoke.cont82 + ret void + +lpad81: ; preds = %lpad69 + %7 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: ; preds = %lpad81 + resume { ptr, i32 } %7 + +terminate.lpad: ; preds = %lpad81 + %8 = landingpad { ptr, i32 } + catch ptr null + %9 = extractvalue { ptr, i32 } %8, 0 + tail call void @__clang_call_terminate(ptr %9) #7 + unreachable + +unreachable: ; preds = %lpad23, %lpad48, %lpad29, %lpad11, %lpad + unreachable +} + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #1 + +declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #3 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.resume.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #4 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) local_unnamed_addr #5 comdat { + %2 = tail call ptr @__cxa_begin_catch(ptr %0) #6 + tail call void @_ZSt9terminatev() #7 + unreachable +} + +declare void @_ZSt9terminatev() local_unnamed_addr + +attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { mustprogress willreturn memory(argmem: readwrite) } +attributes #5 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{!"clang version 19.1.7 (git@github.com:neboat/opencilk-project.git 8789ce788f0a6ecd35d9e9eef9e6652704d143d2)"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = distinct !{!9, !10, !11} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"tapir.loop.spawn.strategy", i32 1} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll new file mode 100644 index 000000000000..188797afedb9 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/csi-sync-unwind-loopexit.ll @@ -0,0 +1,235 @@ +; Check that CSI loop instrumentation instruments around sync-unwind loop exits properly. +; +; RUN: opt < %s -passes="csi-setup,csi" -S | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +$__clang_call_terminate = comdat any + +; Function Attrs: mustprogress uwtable +define dso_local void @_Z3fooi(i32 noundef %n) local_unnamed_addr #0 personality ptr @__gxx_personality_v0 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + %cmp = icmp sgt i32 %n, 0 + br i1 %cmp, label %pfor.cond, label %try.cont + +pfor.cond: ; preds = %entry, %pfor.inc + %__begin.0 = phi i32 [ %inc, %pfor.inc ], [ 0, %entry ] + detach within %syncreg, label %pfor.body.entry, label %pfor.inc unwind label %lpad35.loopexit + +pfor.body.entry: ; preds = %pfor.cond + %w = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %w) + %0 = tail call token @llvm.taskframe.create() + %syncreg2 = tail call token @llvm.syncregion.start() + detach within %syncreg2, label %det.achd, label %det.cont unwind label %lpad4 + +det.achd: ; preds = %pfor.body.entry + %call = invoke noundef i32 @_Z3bari(i32 noundef %__begin.0) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %det.achd + store i32 %call, ptr %w, align 4, !tbaa !5 + reattach within %syncreg2, label %det.cont + +det.cont: ; preds = %pfor.body.entry, %invoke.cont + %add14 = add nuw nsw i32 %__begin.0, 1 + %call16 = invoke noundef i32 @_Z3bari(i32 noundef %add14) + to label %invoke.cont15 unwind label %lpad11.tfsplit.split-lp + +invoke.cont15: ; preds = %det.cont + sync within %syncreg2, label %sync.continue + +sync.continue: ; preds = %invoke.cont15 + invoke void @llvm.sync.unwind(token %syncreg2) + to label %invoke.cont17 unwind label %lpad11.tfsplit.split-lp + +; CHECK: invoke.cont15: +; CHECK: call void @__csi_bb_exit( +; CHECK: call void @__csi_loopbody_exit( +; CHECK: call void @__csi_before_sync( +; CHECK: sync within %syncreg2, label %sync.continue + +; CHECK: sync.continue: +; CHECK-NOT: call +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %syncreg2) +; CHECK-NEXT: to label %invoke.cont17 unwind label %[[CSI_LPAD_SPLIT:.+]] + +invoke.cont17: ; preds = %sync.continue + tail call void @llvm.taskframe.end(token %0) + %w.0.load67 = load i32, ptr %w, align 4 + %call27 = invoke noundef i32 @_Z3bari(i32 noundef %w.0.load67) + to label %invoke.cont26 unwind label %lpad23.tfsplit.split-lp + +; CHECK: invoke.cont17: +; CHECK-NEXT: call void @__csi_after_sync( +; CHECK-NEXT: call void @llvm.taskframe.end( + +invoke.cont26: ; preds = %invoke.cont17 + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + reattach within %syncreg, label %pfor.inc + +pfor.inc: ; preds = %pfor.cond, %invoke.cont26 + %inc = add nuw nsw i32 %__begin.0, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %pfor.cond.cleanup, label %pfor.cond, !llvm.loop !9 + +pfor.cond.cleanup: ; preds = %pfor.inc + sync within %syncreg, label %sync.continue39 + +lpad: ; preds = %det.achd + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg2, { ptr, i32 } %1) + to label %unreachable unwind label %lpad4 + +lpad4: ; preds = %pfor.body.entry, %lpad + %2 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +; CHECK: [[CSI_LPAD_SPLIT]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: call void @__csi_after_sync( + +lpad11.tfsplit.split-lp: ; preds = %det.cont, %sync.continue + %lpad.tfsplit.split-lp63 = landingpad { ptr, i32 } + cleanup + br label %lpad11 + +lpad11: ; preds = %lpad11.tfsplit.split-lp, %lpad4 + %lpad.phi64 = phi { ptr, i32 } [ %2, %lpad4 ], [ %lpad.tfsplit.split-lp63, %lpad11.tfsplit.split-lp ] + invoke void @llvm.taskframe.resume.sl_p0i32s(token %0, { ptr, i32 } %lpad.phi64) + to label %unreachable unwind label %lpad23.tfsplit + +lpad23.tfsplit: ; preds = %lpad11 + %lpad.tfsplit = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23.tfsplit.split-lp: ; preds = %invoke.cont17 + %lpad.tfsplit.split-lp = landingpad { ptr, i32 } + cleanup + br label %lpad23 + +lpad23: ; preds = %lpad23.tfsplit.split-lp, %lpad23.tfsplit + %lpad.phi = phi { ptr, i32 } [ %lpad.tfsplit, %lpad23.tfsplit ], [ %lpad.tfsplit.split-lp, %lpad23.tfsplit.split-lp ] + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %w) + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.phi) + to label %unreachable unwind label %lpad35.loopexit + +lpad35.loopexit: ; preds = %lpad23, %pfor.cond + %lpad.loopexit = landingpad { ptr, i32 } + catch ptr null + br label %lpad35 + +lpad35.loopexit.split-lp: ; preds = %sync.continue39 + %lpad.loopexit.split-lp = landingpad { ptr, i32 } + catch ptr null + br label %lpad35 + +lpad35: ; preds = %lpad35.loopexit.split-lp, %lpad35.loopexit + %lpad.phi66 = phi { ptr, i32 } [ %lpad.loopexit, %lpad35.loopexit ], [ %lpad.loopexit.split-lp, %lpad35.loopexit.split-lp ] + %3 = extractvalue { ptr, i32 } %lpad.phi66, 0 + %4 = tail call ptr @__cxa_begin_catch(ptr %3) #6 + %call49 = invoke noundef i32 @_Z3bari(i32 noundef 0) + to label %invoke.cont48 unwind label %lpad47 + +sync.continue39: ; preds = %pfor.cond.cleanup + invoke void @llvm.sync.unwind(token %syncreg) + to label %try.cont unwind label %lpad35.loopexit.split-lp + +invoke.cont48: ; preds = %lpad35 + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: ; preds = %entry, %sync.continue39, %invoke.cont48 + ret void + +lpad47: ; preds = %lpad35 + %5 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: ; preds = %lpad47 + resume { ptr, i32 } %5 + +terminate.lpad: ; preds = %lpad47 + %6 = landingpad { ptr, i32 } + catch ptr null + %7 = extractvalue { ptr, i32 } %6, 0 + tail call void @__clang_call_terminate(ptr %7) #7 + unreachable + +unreachable: ; preds = %lpad23, %lpad11, %lpad + unreachable +} + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.taskframe.create() #1 + +declare noundef i32 @_Z3bari(i32 noundef) local_unnamed_addr #3 + +declare i32 @__gxx_personality_v0(...) + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.resume.sl_p0i32s(token, { ptr, i32 }) #4 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #4 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare void @llvm.taskframe.end(token) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +declare ptr @__cxa_begin_catch(ptr) local_unnamed_addr + +declare void @__cxa_end_catch() local_unnamed_addr + +; Function Attrs: noinline noreturn nounwind uwtable +define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) local_unnamed_addr #5 comdat { + %2 = tail call ptr @__cxa_begin_catch(ptr %0) #6 + tail call void @_ZSt9terminatev() #7 + unreachable +} + +declare void @_ZSt9terminatev() local_unnamed_addr + +attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { mustprogress willreturn memory(argmem: readwrite) } +attributes #5 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind } +attributes #7 = { noreturn nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3} +!llvm.ident = !{!4} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 2} +!4 = !{!"clang version 19.1.7 (git@github.com:neboat/opencilk-project.git 8789ce788f0a6ecd35d9e9eef9e6652704d143d2)"} +!5 = !{!6, !6, i64 0} +!6 = !{!"int", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C++ TBAA"} +!9 = distinct !{!9, !10, !11, !12} +!10 = !{!"llvm.loop.mustprogress"} +!11 = !{!"tapir.loop.spawn.strategy", i32 1} +!12 = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll index a83b48741a80..9a4e5343fa8b 100644 --- a/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/split-unreachable-predecessors.ll @@ -116,9 +116,6 @@ cleanup.cont: ; preds = %cleanup, %cleanup unreachable: ; preds = %cleanup, %lpad unreachable -; CHECK: [[DR_UNREACHABLE]]: -; CHECK-NEXT: unreachable - ; CHECK: unreachable: ; CHECK: unreachable } diff --git a/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll b/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll new file mode 100644 index 000000000000..2825387ecf92 --- /dev/null +++ b/llvm/test/Transforms/Tapir/mixed-predecessors-of-unreachable.ll @@ -0,0 +1,91 @@ +; Check that loop simplification does not split placeholder successors of +; detached.rethrows when those unreachable blocks have other predecessors. +; +; RUN: opt < %s -passes="cilksan" -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128-Fn32" +target triple = "arm64-apple-macosx13.0.0" + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #1 + +; Function Attrs: sanitize_cilk +define void @_ZN9LAMMPS_NS9StencilMD31SORT_LOCAL_ATOMS_ZOID_MANY_CUTSEv() #2 personality ptr null { +entry: + %syncreg = call token @llvm.syncregion.start() + br label %pfor.detach + +pfor.detach: ; preds = %pfor.detach, %entry + detach within %syncreg, label %pfor.body.entry, label %pfor.detach unwind label %lpad635 + +pfor.body.entry: ; preds = %pfor.detach + %syncreg51 = call token @llvm.syncregion.start() + br label %pfor.detach62 + +pfor.detach62: ; preds = %pfor.detach62, %pfor.body.entry + detach within %syncreg51, label %pfor.body.entry64, label %pfor.detach62 unwind label %lpad109 + +pfor.body.entry64: ; preds = %pfor.detach62 + br label %for.cond + +for.cond: ; preds = %for.cond, %pfor.body.entry64 + br label %for.cond + +lpad109: ; preds = %pfor.detach62 + %0 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } zeroinitializer) + to label %unreachable unwind label %lpad635 + +; CHECK: lpad109: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, +; CHECK-NEXT: to label %unreachable unwind label %lpad635 + +pfor.body.entry140: ; No predecessors! + %syncreg143 = call token @llvm.syncregion.start() + br label %pfor.detach157 + +pfor.detach157: ; preds = %pfor.preattach289, %pfor.detach157, %pfor.body.entry140 + detach within %syncreg143, label %pfor.body.entry159, label %pfor.detach157 unwind label %lpad295 + +pfor.body.entry159: ; preds = %pfor.detach157 + switch i32 0, label %unreachable [ + i32 0, label %pfor.preattach289 + i32 1, label %pfor.preattach289 + ] + +pfor.preattach289: ; preds = %pfor.body.entry159, %pfor.body.entry159 + reattach within %syncreg143, label %pfor.detach157 + +lpad295: ; preds = %pfor.detach157 + %1 = landingpad { ptr, i32 } + cleanup + invoke void @llvm.detached.rethrow.sl_p0i32s(token none, { ptr, i32 } zeroinitializer) + to label %unreachable unwind label %lpad635 + +; CHECK: lpad295: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: invoke void @llvm.detached.rethrow.sl_p0i32s(token none, +; CHECK-NEXT: to label %unreachable unwind label %lpad635 + +lpad635: ; preds = %lpad295, %lpad109, %pfor.detach + %2 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } zeroinitializer + +unreachable: ; preds = %lpad295, %pfor.body.entry159, %lpad109 + unreachable +} + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } +attributes #1 = { willreturn memory(argmem: readwrite) } +attributes #2 = { sanitize_cilk }