Add 3 LLVM patches to fix issue #19792

miscompilation of broadcast
JuliaLang · Jan 2, 2017 · b4939da · b4939da
1 parent c38a5a3
commit b4939da
Show file tree

Hide file tree

Showing 4 changed files with 241 additions and 3 deletions.
diff --git a/deps/llvm.mk b/deps/llvm.mk
@@ -489,17 +489,20 @@ $(eval $(call LLVM_PATCH,llvm-3.9.0_cygwin)) # R283427, Remove for 4.0
 endif
 $(eval $(call LLVM_PATCH,llvm-PR22923)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-arm-fix-prel31))
-$(eval $(call LLVM_PATCH,llvm-D25865-cmakeshlib))
+$(eval $(call LLVM_PATCH,llvm-D25865-cmakeshlib)) # Remove for 4.0
 # Cygwin and openSUSE still use win32-threads mingw, https://llvm.org/bugs/show_bug.cgi?id=26365
 $(eval $(call LLVM_PATCH,llvm-3.9.0_threads))
-$(eval $(call LLVM_PATCH,llvm-3.9.0_win64-reloc-dwarf))
+$(eval $(call LLVM_PATCH,llvm-3.9.0_win64-reloc-dwarf)) # modified version applied as R290809, Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-3.9.0_D27296-libssp))
-$(eval $(call LLVM_PATCH,llvm-D27609-AArch64-UABS_G3))
+$(eval $(call LLVM_PATCH,llvm-D27609-AArch64-UABS_G3)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model))
 # patches for NVPTX
 $(eval $(call LLVM_PATCH,llvm-D9168_argument_alignment)) # Remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D23597_sdag_names)) # Dep for D24300, remove for 4.0
 $(eval $(call LLVM_PATCH,llvm-D24300_ptx_intrinsics)) # Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-D27389)) # Julia issue #19792, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-D27397)) # Julia issue #19792, Remove for 4.0
+$(eval $(call LLVM_PATCH,llvm-D28009)) # Julia issue #19792, Remove for 4.0
 endif # LLVM_VER
 
 ifeq ($(LLVM_VER),3.7.1)

diff --git a/deps/patches/llvm-D27389.patch b/deps/patches/llvm-D27389.patch
@@ -0,0 +1,66 @@
+commit 83dc06334ff95ad18a951d0bb540290510f2f81a
+Author: Keno Fischer <[email protected]>
+Date:   Thu Dec 8 17:22:35 2016 +0000
+
+    ConstantFolding: Don't crash when encountering vector GEP
+
+    ConstantFolding tried to cast one of the scalar indices to a vector
+    type. Instead, use the vector type only for the first index (which
+    is the only one allowed to be a vector) and use its scalar type
+    otherwise.
+
+    Fixes PR31250.
+
+    Reviewers: majnemer
+    Differential Revision: https://reviews.llvm.org/D27389
+
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@289073 91177308-0d34-0410-b5e6-96231b3b80d8
+
+diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
+index 2d1edfe..1c0bf01a 100644
+--- a/lib/Analysis/ConstantFolding.cpp
++++ b/lib/Analysis/ConstantFolding.cpp
+@@ -734,14 +734,15 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
+                          Type *ResultTy, Optional<unsigned> InRangeIndex,
+                          const DataLayout &DL, const TargetLibraryInfo *TLI) {
+   Type *IntPtrTy = DL.getIntPtrType(ResultTy);
++  Type *IntPtrScalarTy = IntPtrTy->getScalarType();
+
+   bool Any = false;
+   SmallVector<Constant*, 32> NewIdxs;
+   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
+     if ((i == 1 ||
+-         !isa<StructType>(GetElementPtrInst::getIndexedType(SrcElemTy,
+-             Ops.slice(1, i - 1)))) &&
+-        Ops[i]->getType() != IntPtrTy) {
++         !isa<StructType>(GetElementPtrInst::getIndexedType(
++             SrcElemTy, Ops.slice(1, i - 1)))) &&
++        Ops[i]->getType() != (i == 1 ? IntPtrTy : IntPtrScalarTy)) {
+       Any = true;
+       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                       true,
+diff --git a/test/Analysis/ConstantFolding/vectorgep-crash.ll b/test/Analysis/ConstantFolding/vectorgep-crash.ll
+new file mode 100644
+index 0000000..bcc96b2
+--- /dev/null
++++ b/test/Analysis/ConstantFolding/vectorgep-crash.ll
+@@ -0,0 +1,19 @@
++; RUN: opt -instcombine -S -o - %s | FileCheck %s
++; Tests that we don't crash upon encountering a vector GEP
++
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++target triple = "x86_64-unknown-linux-gnu"
++
++%Dual = type { %Dual.72, %Partials.73 }
++%Dual.72 = type { double, %Partials }
++%Partials = type { [2 x double] }
++%Partials.73 = type { [2 x %Dual.72] }
++
++; Function Attrs: sspreq
++define <8 x i64*> @"julia_axpy!_65480"(%Dual* %arg1, <8 x i64> %arg2) {
++top:
++; CHECK: %VectorGep14 = getelementptr inbounds %Dual, %Dual* %arg1, <8 x i64> %arg2, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
++  %VectorGep14 = getelementptr inbounds %Dual, %Dual* %arg1, <8 x i64> %arg2, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
++  %0 = bitcast <8 x double*> %VectorGep14 to <8 x i64*>
++  ret <8 x i64*> %0
++}
diff --git a/deps/patches/llvm-D27397.patch b/deps/patches/llvm-D27397.patch
@@ -0,0 +1,101 @@
+commit 99ca52276f9ee1386866d6dff6179cfa64824621
+Author: Keno Fischer <[email protected]>
+Date:   Mon Dec 5 21:25:03 2016 +0000
+
+    [LAA] Prevent invalid IR for loop-invariant bound in loop body
+
+    Summary:
+    If LAA expands a bound that is loop invariant, but not hoisted out
+    of the loop body, it used to use that value anyway, causing a
+    non-domination error, because the memcheck block is of course not
+    dominated by the scalar loop body. Detect this situation and expand
+    the SCEV expression instead.
+
+    Fixes PR31251
+
+    Reviewers: anemet
+    Subscribers: mzolotukhin, llvm-commits
+
+    Differential Revision: https://reviews.llvm.org/D27397
+
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@288705 91177308-0d34-0410-b5e6-96231b3b80d8
+
+diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
+index 01a2f46..2f3dca3 100644
+--- a/lib/Analysis/LoopAccessAnalysis.cpp
++++ b/lib/Analysis/LoopAccessAnalysis.cpp
+@@ -1870,18 +1870,24 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
+   Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
+   const SCEV *Sc = SE->getSCEV(Ptr);
+
++  unsigned AS = Ptr->getType()->getPointerAddressSpace();
++  LLVMContext &Ctx = Loc->getContext();
++
++  // Use this type for pointer arithmetic.
++  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
++
+   if (SE->isLoopInvariant(Sc, TheLoop)) {
+     DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
+                  << "\n");
+-    return {Ptr, Ptr};
++    // Ptr could be in the loop body. If so, expand a new one at the correct
++    // location.
++    Instruction *Inst = dyn_cast<Instruction>(Ptr);
++    Value *NewPtr = (Inst && TheLoop->contains(Inst))
++                        ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
++                        : Ptr;
++    return {NewPtr, NewPtr};
+   } else {
+-    unsigned AS = Ptr->getType()->getPointerAddressSpace();
+-    LLVMContext &Ctx = Loc->getContext();
+-
+-    // Use this type for pointer arithmetic.
+-    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+     Value *Start = nullptr, *End = nullptr;
+-
+     DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
+     Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
+     End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
+diff --git a/test/Transforms/LoopVersioning/loop-invariant-bound.ll b/test/Transforms/LoopVersioning/loop-invariant-bound.ll
+new file mode 100644
+index 0000000..3411adb
+--- /dev/null
++++ b/test/Transforms/LoopVersioning/loop-invariant-bound.ll
+@@ -0,0 +1,37 @@
++; RUN: opt -loop-versioning -S < %s | FileCheck %s
++; Checks that when introducing check, we don't accidentally introduce non-dominating instructions
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
++
++%Dual.212 = type { %Dual.213, %Partials.215 }
++%Dual.213 = type { double, %Partials.214 }
++%Partials.214 = type { [2 x double] }
++%Partials.215 = type { [2 x %Dual.213] }
++
++; Function Attrs: sspreq
++define void @"julia_axpy!_65480"(%Dual.212*) {
++top:
++  br label %if24
++
++; CHECK-NOT: %bc = bitcast i64* %v2.sroa.0.0..sroa_cast
++; CHECK: %bound0
++
++if24:                                             ; preds = %if24, %top
++  %"#temp#1.sroa.3.02" = phi i64 [ undef, %top ], [ %2, %if24 ]
++  %"#temp#1.sroa.0.01" = phi i64 [ undef, %top ], [ %1, %if24 ]
++  %1 = add i64 %"#temp#1.sroa.0.01", 1
++  %2 = add i64 %"#temp#1.sroa.3.02", 1
++  ; This pointer is loop invariant. LAA used to re-use it from memcheck, even though it didn't dominate.
++  %v2.sroa.0.0..sroa_cast = bitcast %Dual.212* %0 to i64*
++  %v2.sroa.0.0.copyload = load i64, i64* %v2.sroa.0.0..sroa_cast, align 1
++  %3 = add i64 %"#temp#1.sroa.0.01", -1
++  %4 = getelementptr inbounds %Dual.212, %Dual.212* undef, i64 %3, i32 1, i32 0, i64 0, i32 1, i32 0, i64 0
++  %5 = bitcast double* %4 to i64*
++  store i64 undef, i64* %5, align 8
++  %notlhs27 = icmp eq i64 %2, undef
++  %notrhs28 = icmp eq i64 %1, undef
++  %6 = or i1 %notrhs28, %notlhs27
++  br i1 %6, label %L41.L335_crit_edge, label %if24
++
++L41.L335_crit_edge:                               ; preds = %if24
++  ret void
++}
diff --git a/deps/patches/llvm-D28009.patch b/deps/patches/llvm-D28009.patch
@@ -0,0 +1,68 @@
+commit 57ab82784ddb8d21eb0041d52f8490d8fd404e29
+Author: Michael Kuperstein <[email protected]>
+Date:   Wed Dec 21 17:34:21 2016 +0000
+
+    [ConstantFolding] Fix vector GEPs harder
+
+    For vector GEPs, CastGEPIndices can end up in an infinite recursion, because
+    we compare the vector type to the scalar pointer type, find them different,
+    and then try to cast a type to itself.
+
+    Differential Revision: https://reviews.llvm.org/D28009
+
+
+    git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290260 91177308-0d34-0410-b5e6-96231b3b80d8
+
+diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
+index cf0d5e4..9e521e1 100644
+--- a/lib/Analysis/ConstantFolding.cpp
++++ b/lib/Analysis/ConstantFolding.cpp
+@@ -742,13 +742,16 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
+     if ((i == 1 ||
+          !isa<StructType>(GetElementPtrInst::getIndexedType(
+              SrcElemTy, Ops.slice(1, i - 1)))) &&
+-        Ops[i]->getType() != (i == 1 ? IntPtrTy : IntPtrScalarTy)) {
++        Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
+       Any = true;
++      Type *NewType = Ops[i]->getType()->isVectorTy()
++                          ? IntPtrTy
++                          : IntPtrTy->getScalarType();
+       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
+                                                                       true,
+-                                                                      IntPtrTy,
++                                                                      NewType,
+                                                                       true),
+-                                              Ops[i], IntPtrTy));
++                                              Ops[i], NewType));
+     } else
+       NewIdxs.push_back(Ops[i]);
+   }
+diff --git a/test/Analysis/ConstantFolding/vectorgep-crash.ll b/test/Analysis/ConstantFolding/vectorgep-crash.ll
+index bcc96b2..e7a5117 100644
+--- a/test/Analysis/ConstantFolding/vectorgep-crash.ll
++++ b/test/Analysis/ConstantFolding/vectorgep-crash.ll
+@@ -17,3 +17,24 @@ top:
+   %0 = bitcast <8 x double*> %VectorGep14 to <8 x i64*>
+   ret <8 x i64*> %0
+ }
++
++%struct.A = type { i32, %struct.B* }
++%struct.B = type { i64, %struct.C* }
++%struct.C = type { i64 }
++
++@G = internal global [65 x %struct.A] zeroinitializer, align 16
++; CHECK-LABEL: @test
++; CHECK: ret <16 x i32*> getelementptr ([65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer)
++define <16 x i32*> @test() {
++vector.body:
++  %VectorGep = getelementptr [65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer
++  ret <16 x i32*> %VectorGep
++}
++
++; CHECK-LABEL: @test2
++; CHECK: ret <16 x i32*> getelementptr ([65 x %struct.A], [65 x %struct.A]* @G, <16 x i64> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, 
++define <16 x i32*> @test2() {
++vector.body:
++  %VectorGep = getelementptr [65 x %struct.A], [65 x %struct.A]* @G, <16 x i32> zeroinitializer, <16 x i64> <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16>, <16 x i32> zeroinitializer
++  ret <16 x i32*> %VectorGep
++}