Skip to content

Commit da71949

Browse files
committed
[AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine
Given a zext from an extract vector, with a shuffle mask like <4, 0, 0, 4> we would previously recognize the top half as a deinterleave. In order to convert into a uzp we should have been checking that the bottom half is also undef. Fixes llvm#125989
1 parent 73137e4 commit da71949

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -22332,6 +22332,9 @@ static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N,
2233222332
if (!IsDeInterleave)
2233322333
IsUndefDeInterleave =
2233422334
Shuffle->getOperand(1).isUndef() &&
22335+
all_of(
22336+
Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements() / 2),
22337+
[](int M) { return M < 0; }) &&
2233522338
ShuffleVectorInst::isDeInterleaveMaskOfFactor(
2233622339
Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2,
2233722340
VT.getVectorNumElements() / 2),

llvm/test/CodeGen/AArch64/zext-shuffle.ll

+10-9
Original file line numberDiff line numberDiff line change
@@ -646,9 +646,10 @@ define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) {
646646
define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
647647
; CHECK-LABEL: isUndefDeInterleave_b0_bad:
648648
; CHECK: // %bb.0:
649-
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
650-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
651-
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
649+
; CHECK-NEXT: adrp x8, .LCPI40_0
650+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
651+
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
652+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
652653
; CHECK-NEXT: ret
653654
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 4, i32 4, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
654655
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -659,8 +660,10 @@ define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
659660
define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
660661
; CHECK-LABEL: isUndefDeInterleave_t1_bad:
661662
; CHECK: // %bb.0:
662-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
663-
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
663+
; CHECK-NEXT: adrp x8, .LCPI41_0
664+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI41_0]
665+
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
666+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
664667
; CHECK-NEXT: ret
665668
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 4, i32 1, i32 5>
666669
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
@@ -671,10 +674,8 @@ define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
671674
define i16 @undeftop(<8 x i16> %0) {
672675
; CHECK-LABEL: undeftop:
673676
; CHECK: // %bb.0:
674-
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
675-
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
676-
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
677-
; CHECK-NEXT: add v0.4s, v0.4s, v0.4s
677+
; CHECK-NEXT: dup v0.8h, v0.h[4]
678+
; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
678679
; CHECK-NEXT: xtn v0.4h, v0.4s
679680
; CHECK-NEXT: umov w0, v0.h[0]
680681
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)