Skip to content

Commit c7f18ba

Browse files
davemgreentstellar
authored andcommitted
[AArch64] Fix op mask detection in performZExtDeinterleaveShuffleCombine (llvm#126054)
Given a zext from an extract vector, with a shuffle mask like <4, 0, 0, 4> we would previously recognize the top half as a deinterleave. In order to convert into a uzp we should have been checking that the bottom half is also poison. Fixes llvm#125989 (cherry picked from commit 2c43479)
1 parent f0f59e3 commit c7f18ba

File tree

2 files changed

+146
-0
lines changed

2 files changed

+146
-0
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -22364,6 +22364,9 @@ static SDValue performZExtDeinterleaveShuffleCombine(SDNode *N,
2236422364
if (!IsDeInterleave)
2236522365
IsUndefDeInterleave =
2236622366
Shuffle->getOperand(1).isUndef() &&
22367+
all_of(
22368+
Shuffle->getMask().slice(ExtOffset, VT.getVectorNumElements() / 2),
22369+
[](int M) { return M < 0; }) &&
2236722370
ShuffleVectorInst::isDeInterleaveMaskOfFactor(
2236822371
Shuffle->getMask().slice(ExtOffset + VT.getVectorNumElements() / 2,
2236922372
VT.getVectorNumElements() / 2),

llvm/test/CodeGen/AArch64/zext-shuffle.ll

+143
Original file line numberDiff line numberDiff line change
@@ -543,3 +543,146 @@ define <8 x double> @uitofp_load_fadd(ptr %p) {
543543
ret <8 x double> %c
544544
}
545545

546+
define <4 x i32> @isUndefDeInterleave_b0(<8 x i16> %a, <8 x i16> %b) {
547+
; CHECK-LABEL: isUndefDeInterleave_b0:
548+
; CHECK: // %bb.0:
549+
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
550+
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
551+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
552+
; CHECK-NEXT: ret
553+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
554+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
555+
%3 = zext <4 x i16> %s2 to <4 x i32>
556+
ret <4 x i32> %3
557+
}
558+
559+
define <4 x i32> @isUndefDeInterleave_b1(<8 x i16> %a) {
560+
; CHECK-LABEL: isUndefDeInterleave_b1:
561+
; CHECK: // %bb.0:
562+
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
563+
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
564+
; CHECK-NEXT: ret
565+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 1, i32 5, i32 poison, i32 poison, i32 poison, i32 poison>
566+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
567+
%3 = zext <4 x i16> %s2 to <4 x i32>
568+
ret <4 x i32> %3
569+
}
570+
571+
define <4 x i32> @isUndefDeInterleave_b2(<8 x i16> %a) {
572+
; CHECK-LABEL: isUndefDeInterleave_b2:
573+
; CHECK: // %bb.0:
574+
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
575+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
576+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
577+
; CHECK-NEXT: ret
578+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 2, i32 6, i32 poison, i32 poison, i32 poison, i32 poison>
579+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
580+
%3 = zext <4 x i16> %s2 to <4 x i32>
581+
ret <4 x i32> %3
582+
}
583+
584+
define <4 x i32> @isUndefDeInterleave_b3(<8 x i16> %a) {
585+
; CHECK-LABEL: isUndefDeInterleave_b3:
586+
; CHECK: // %bb.0:
587+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
588+
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
589+
; CHECK-NEXT: ret
590+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 3, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
591+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
592+
%3 = zext <4 x i16> %s2 to <4 x i32>
593+
ret <4 x i32> %3
594+
}
595+
596+
define <4 x i32> @isUndefDeInterleave_t0(<8 x i16> %a, <8 x i16> %b) {
597+
; CHECK-LABEL: isUndefDeInterleave_t0:
598+
; CHECK: // %bb.0:
599+
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
600+
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
601+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
602+
; CHECK-NEXT: ret
603+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 0, i32 4>
604+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
605+
%3 = zext <4 x i16> %s2 to <4 x i32>
606+
ret <4 x i32> %3
607+
}
608+
609+
define <4 x i32> @isUndefDeInterleave_t1(<8 x i16> %a) {
610+
; CHECK-LABEL: isUndefDeInterleave_t1:
611+
; CHECK: // %bb.0:
612+
; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s
613+
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
614+
; CHECK-NEXT: ret
615+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 1, i32 5>
616+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
617+
%3 = zext <4 x i16> %s2 to <4 x i32>
618+
ret <4 x i32> %3
619+
}
620+
621+
define <4 x i32> @isUndefDeInterleave_t2(<8 x i16> %a) {
622+
; CHECK-LABEL: isUndefDeInterleave_t2:
623+
; CHECK: // %bb.0:
624+
; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff
625+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
626+
; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
627+
; CHECK-NEXT: ret
628+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 2, i32 6>
629+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
630+
%3 = zext <4 x i16> %s2 to <4 x i32>
631+
ret <4 x i32> %3
632+
}
633+
634+
define <4 x i32> @isUndefDeInterleave_t3(<8 x i16> %a) {
635+
; CHECK-LABEL: isUndefDeInterleave_t3:
636+
; CHECK: // %bb.0:
637+
; CHECK-NEXT: uzp2 v0.4s, v0.4s, v0.4s
638+
; CHECK-NEXT: ushr v0.4s, v0.4s, #16
639+
; CHECK-NEXT: ret
640+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 3, i32 7>
641+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
642+
%3 = zext <4 x i16> %s2 to <4 x i32>
643+
ret <4 x i32> %3
644+
}
645+
646+
define <4 x i32> @isUndefDeInterleave_b0_bad(<8 x i16> %a, <8 x i16> %b) {
647+
; CHECK-LABEL: isUndefDeInterleave_b0_bad:
648+
; CHECK: // %bb.0:
649+
; CHECK-NEXT: adrp x8, .LCPI40_0
650+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI40_0]
651+
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
652+
; CHECK-NEXT: ushll v0.4s, v0.4h, #0
653+
; CHECK-NEXT: ret
654+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 4, i32 4, i32 0, i32 4, i32 poison, i32 poison, i32 poison, i32 poison>
655+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
656+
%3 = zext <4 x i16> %s2 to <4 x i32>
657+
ret <4 x i32> %3
658+
}
659+
660+
define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
661+
; CHECK-LABEL: isUndefDeInterleave_t1_bad:
662+
; CHECK: // %bb.0:
663+
; CHECK-NEXT: adrp x8, .LCPI41_0
664+
; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI41_0]
665+
; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b
666+
; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0
667+
; CHECK-NEXT: ret
668+
%2 = shufflevector <8 x i16> %a, <8 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 4, i32 4, i32 1, i32 5>
669+
%s2 = shufflevector <8 x i16> %2, <8 x i16> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
670+
%3 = zext <4 x i16> %s2 to <4 x i32>
671+
ret <4 x i32> %3
672+
}
673+
674+
define i16 @undeftop(<8 x i16> %0) {
675+
; CHECK-LABEL: undeftop:
676+
; CHECK: // %bb.0:
677+
; CHECK-NEXT: dup v0.8h, v0.h[4]
678+
; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
679+
; CHECK-NEXT: xtn v0.4h, v0.4s
680+
; CHECK-NEXT: umov w0, v0.h[0]
681+
; CHECK-NEXT: ret
682+
%2 = shufflevector <8 x i16> %0, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 9, i32 7, i32 5, i32 3>
683+
%3 = zext <8 x i16> %2 to <8 x i64>
684+
%new0 = add <8 x i64> %3, %3
685+
%last = trunc <8 x i64> %new0 to <8 x i16>
686+
%4 = extractelement <8 x i16> %last, i32 0
687+
ret i16 %4
688+
}

0 commit comments

Comments
 (0)