Skip to content

DAG: Fix vector_shuffle -> splat fold defining undef lanes #123596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26386,9 +26386,17 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (AllSame)
return N0;

// Canonicalize any other splat as a build_vector.
// Canonicalize any other splat as a build_vector, but avoid defining any
// undefined elements in the mask.
SDValue Splatted = V->getOperand(SplatIndex);
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
EVT EltVT = Splatted.getValueType();

for (unsigned i = 0; i != NumElts; ++i) {
if (SVN->getMaskElt(i) < 0)
Ops[i] = DAG.getUNDEF(EltVT);
}

SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);

// We may have jumped through bitcasts, so the type of the
Expand Down
787 changes: 500 additions & 287 deletions llvm/test/CodeGen/AMDGPU/shufflevector.v4i64.v3i64.ll

Large diffs are not rendered by default.

787 changes: 500 additions & 287 deletions llvm/test/CodeGen/AMDGPU/shufflevector.v4p0.v3p0.ll

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/test/CodeGen/PowerPC/vector-reduce-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3628,31 +3628,31 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
; PWR9LE-LABEL: v2ppcf128_fast:
; PWR9LE: # %bb.0: # %entry
; PWR9LE-NEXT: mflr r0
; PWR9LE-NEXT: stdu r1, -64(r1)
; PWR9LE-NEXT: std r0, 80(r1)
; PWR9LE-NEXT: stdu r1, -48(r1)
; PWR9LE-NEXT: std r0, 64(r1)
; PWR9LE-NEXT: bl __gcc_qadd
; PWR9LE-NEXT: nop
; PWR9LE-NEXT: stfd f2, 40(r1)
; PWR9LE-NEXT: stfd f1, 32(r1)
; PWR9LE-NEXT: lxv vs1, 32(r1)
; PWR9LE-NEXT: xxswapd vs2, vs1
; PWR9LE-NEXT: addi r1, r1, 64
; PWR9LE-NEXT: addi r1, r1, 48
; PWR9LE-NEXT: ld r0, 16(r1)
; PWR9LE-NEXT: mtlr r0
; PWR9LE-NEXT: blr
;
; PWR9BE-LABEL: v2ppcf128_fast:
; PWR9BE: # %bb.0: # %entry
; PWR9BE-NEXT: mflr r0
; PWR9BE-NEXT: stdu r1, -144(r1)
; PWR9BE-NEXT: std r0, 160(r1)
; PWR9BE-NEXT: stdu r1, -128(r1)
; PWR9BE-NEXT: std r0, 144(r1)
; PWR9BE-NEXT: bl __gcc_qadd
; PWR9BE-NEXT: nop
; PWR9BE-NEXT: stfd f2, 120(r1)
; PWR9BE-NEXT: stfd f1, 112(r1)
; PWR9BE-NEXT: lxv vs1, 112(r1)
; PWR9BE-NEXT: xxswapd vs2, vs1
; PWR9BE-NEXT: addi r1, r1, 144
; PWR9BE-NEXT: addi r1, r1, 128
; PWR9BE-NEXT: ld r0, 16(r1)
; PWR9BE-NEXT: mtlr r0
; PWR9BE-NEXT: blr
Expand All @@ -3661,13 +3661,13 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
; PWR10LE: # %bb.0: # %entry
; PWR10LE-NEXT: mflr r0
; PWR10LE-NEXT: std r0, 16(r1)
; PWR10LE-NEXT: stdu r1, -64(r1)
; PWR10LE-NEXT: stdu r1, -48(r1)
; PWR10LE-NEXT: bl __gcc_qadd@notoc
; PWR10LE-NEXT: stfd f2, 40(r1)
; PWR10LE-NEXT: stfd f1, 32(r1)
; PWR10LE-NEXT: lxv vs1, 32(r1)
; PWR10LE-NEXT: xxswapd vs2, vs1
; PWR10LE-NEXT: addi r1, r1, 64
; PWR10LE-NEXT: addi r1, r1, 48
; PWR10LE-NEXT: ld r0, 16(r1)
; PWR10LE-NEXT: mtlr r0
; PWR10LE-NEXT: blr
Expand All @@ -3676,14 +3676,14 @@ define dso_local ppc_fp128 @v2ppcf128_fast(<2 x ppc_fp128> %a) local_unnamed_add
; PWR10BE: # %bb.0: # %entry
; PWR10BE-NEXT: mflr r0
; PWR10BE-NEXT: std r0, 16(r1)
; PWR10BE-NEXT: stdu r1, -144(r1)
; PWR10BE-NEXT: stdu r1, -128(r1)
; PWR10BE-NEXT: bl __gcc_qadd
; PWR10BE-NEXT: nop
; PWR10BE-NEXT: stfd f2, 120(r1)
; PWR10BE-NEXT: stfd f1, 112(r1)
; PWR10BE-NEXT: lxv vs1, 112(r1)
; PWR10BE-NEXT: xxswapd vs2, vs1
; PWR10BE-NEXT: addi r1, r1, 144
; PWR10BE-NEXT: addi r1, r1, 128
; PWR10BE-NEXT: ld r0, 16(r1)
; PWR10BE-NEXT: mtlr r0
; PWR10BE-NEXT: blr
Expand Down
30 changes: 0 additions & 30 deletions llvm/test/CodeGen/WebAssembly/simd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -481,21 +481,6 @@ define <16 x i8> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v16i8:
; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.store8 15($0), $2
; NO-SIMD128-NEXT: i32.store8 14($0), $2
; NO-SIMD128-NEXT: i32.store8 13($0), $2
; NO-SIMD128-NEXT: i32.store8 12($0), $2
; NO-SIMD128-NEXT: i32.store8 11($0), $2
; NO-SIMD128-NEXT: i32.store8 10($0), $2
; NO-SIMD128-NEXT: i32.store8 9($0), $2
; NO-SIMD128-NEXT: i32.store8 8($0), $2
; NO-SIMD128-NEXT: i32.store8 7($0), $2
; NO-SIMD128-NEXT: i32.store8 6($0), $2
; NO-SIMD128-NEXT: i32.store8 5($0), $2
; NO-SIMD128-NEXT: i32.store8 4($0), $2
; NO-SIMD128-NEXT: i32.store8 3($0), $2
; NO-SIMD128-NEXT: i32.store8 2($0), $2
; NO-SIMD128-NEXT: i32.store8 1($0), $2
; NO-SIMD128-NEXT: i32.store8 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <16 x i8> %x, <16 x i8> %y,
Expand Down Expand Up @@ -994,13 +979,6 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v8i16:
; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.store16 14($0), $2
; NO-SIMD128-NEXT: i32.store16 12($0), $2
; NO-SIMD128-NEXT: i32.store16 10($0), $2
; NO-SIMD128-NEXT: i32.store16 8($0), $2
; NO-SIMD128-NEXT: i32.store16 6($0), $2
; NO-SIMD128-NEXT: i32.store16 4($0), $2
; NO-SIMD128-NEXT: i32.store16 2($0), $2
; NO-SIMD128-NEXT: i32.store16 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <8 x i16> %x, <8 x i16> %y,
Expand Down Expand Up @@ -1288,9 +1266,6 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v4i32:
; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i32.store 12($0), $2
; NO-SIMD128-NEXT: i32.store 8($0), $2
; NO-SIMD128-NEXT: i32.store 4($0), $2
; NO-SIMD128-NEXT: i32.store 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x i32> %x, <4 x i32> %y,
Expand Down Expand Up @@ -1550,7 +1525,6 @@ define <2 x i64> @shuffle_undef_v2i64(<2 x i64> %x, <2 x i64> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v2i64:
; NO-SIMD128: .functype shuffle_undef_v2i64 (i32, i64, i64, i64, i64) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: i64.store 8($0), $2
; NO-SIMD128-NEXT: i64.store 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <2 x i64> %x, <2 x i64> %y,
Expand Down Expand Up @@ -1819,9 +1793,6 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v4f32:
; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f32.store 12($0), $2
; NO-SIMD128-NEXT: f32.store 8($0), $2
; NO-SIMD128-NEXT: f32.store 4($0), $2
; NO-SIMD128-NEXT: f32.store 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <4 x float> %x, <4 x float> %y,
Expand Down Expand Up @@ -2082,7 +2053,6 @@ define <2 x double> @shuffle_undef_v2f64(<2 x double> %x, <2 x double> %y) {
; NO-SIMD128-LABEL: shuffle_undef_v2f64:
; NO-SIMD128: .functype shuffle_undef_v2f64 (i32, f64, f64, f64, f64) -> ()
; NO-SIMD128-NEXT: # %bb.0:
; NO-SIMD128-NEXT: f64.store 8($0), $2
; NO-SIMD128-NEXT: f64.store 0($0), $2
; NO-SIMD128-NEXT: return
%res = shufflevector <2 x double> %x, <2 x double> %y,
Expand Down
6 changes: 4 additions & 2 deletions llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll
Original file line number Diff line number Diff line change
Expand Up @@ -141,8 +141,10 @@ define <3 x i1> @test_urem_vec(<3 x i11> %X) nounwind {
; SSE2-NEXT: pmuludq %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
; SSE2-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
; SSE2-NEXT: movl $1463, %eax # imm = 0x5B7
; SSE2-NEXT: movd %eax, %xmm3
; SSE2-NEXT: pmuludq %xmm1, %xmm3
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2047,2047,2047,2047]
; SSE2-NEXT: movdqa %xmm0, %xmm3
Expand Down
24 changes: 9 additions & 15 deletions llvm/test/CodeGen/X86/vec_smulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -474,8 +474,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSE2-NEXT: pcmpgtd %xmm3, %xmm6
; SSE2-NEXT: pand %xmm7, %xmm6
; SSE2-NEXT: paddd %xmm8, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE2-NEXT: pmuludq %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
Expand Down Expand Up @@ -548,8 +546,6 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm6
; SSSE3-NEXT: pand %xmm7, %xmm6
; SSSE3-NEXT: paddd %xmm8, %xmm6
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,0,0,0]
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSSE3-NEXT: pmuludq %xmm2, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,3,2,3]
Expand Down Expand Up @@ -578,25 +574,23 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSE41-NEXT: movdqa %xmm0, %xmm1
; SSE41-NEXT: pmuldq %xmm2, %xmm0
; SSE41-NEXT: pinsrd $3, %r8d, %xmm2
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %ecx
; SSE41-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero
; SSE41-NEXT: movd %r9d, %xmm4
; SSE41-NEXT: movdqa %xmm4, %xmm5
; SSE41-NEXT: pmuldq %xmm3, %xmm4
; SSE41-NEXT: pinsrd $1, %edx, %xmm3
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %esi
; SSE41-NEXT: pinsrd $1, %esi, %xmm5
; SSE41-NEXT: pinsrd $1, %ecx, %xmm3
; SSE41-NEXT: movl {{[0-9]+}}(%rsp), %edx
; SSE41-NEXT: pinsrd $1, %edx, %xmm5
; SSE41-NEXT: pmulld %xmm3, %xmm5
; SSE41-NEXT: pinsrd $3, {{[0-9]+}}(%rsp), %xmm1
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rcx
; SSE41-NEXT: movd %edx, %xmm3
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,0,0,0]
; SSE41-NEXT: movd %esi, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,0,0,0]
; SSE41-NEXT: movq {{[0-9]+}}(%rsp), %rsi
; SSE41-NEXT: movd %ecx, %xmm3
; SSE41-NEXT: movd %edx, %xmm6
; SSE41-NEXT: pmuldq %xmm3, %xmm6
; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
; SSE41-NEXT: movq %xmm5, 16(%rcx)
; SSE41-NEXT: movq %xmm5, 16(%rsi)
; SSE41-NEXT: psrad $31, %xmm5
; SSE41-NEXT: pcmpeqd %xmm3, %xmm5
; SSE41-NEXT: pcmpeqd %xmm3, %xmm3
Expand All @@ -607,7 +601,7 @@ define <6 x i32> @smulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm6[2,3],xmm0[4,5],xmm6[6,7]
; SSE41-NEXT: pmulld %xmm2, %xmm1
; SSE41-NEXT: movdqa %xmm1, (%rcx)
; SSE41-NEXT: movdqa %xmm1, (%rsi)
; SSE41-NEXT: psrad $31, %xmm1
; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
; SSE41-NEXT: pxor %xmm3, %xmm1
Expand Down
10 changes: 4 additions & 6 deletions llvm/test/CodeGen/X86/vec_umulo.ll
Original file line number Diff line number Diff line change
Expand Up @@ -394,8 +394,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
; SSE2-NEXT: pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSE2-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
; SSE2-NEXT: pmuludq %xmm2, %xmm6
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
; SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
Expand Down Expand Up @@ -444,8 +444,8 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = mem[0,0,0,0]
; SSSE3-NEXT: pshufd {{.*#+}} xmm6 = mem[0,0,0,0]
; SSSE3-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
; SSSE3-NEXT: movd {{.*#+}} xmm6 = mem[0],zero,zero,zero
; SSSE3-NEXT: pmuludq %xmm2, %xmm6
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
; SSSE3-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,3,2,3]
Expand Down Expand Up @@ -492,9 +492,7 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind {
; SSE41-NEXT: pcmpeqd %xmm6, %xmm6
; SSE41-NEXT: pxor %xmm6, %xmm3
; SSE41-NEXT: movd %edi, %xmm7
; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm7[0,0,0,0]
; SSE41-NEXT: movd %r9d, %xmm8
; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm8[0,0,0,0]
; SSE41-NEXT: pmuludq %xmm7, %xmm8
; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,3],xmm1[4,5],xmm8[6,7]
Expand Down
5 changes: 2 additions & 3 deletions llvm/test/CodeGen/X86/widen_shuffle-1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,13 @@ define void @shuf5(ptr %p) nounwind {
; X86-LABEL: shuf5:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,33,33,33,33,33,33,0,0,0,0,0,0,0,0]
; X86-NEXT: movsd {{.*#+}} xmm0 = [33,33,u,u,u,u,u,u,0,0,u,u,u,u,u,u]
; X86-NEXT: movsd %xmm0, (%eax)
; X86-NEXT: retl
;
; X64-LABEL: shuf5:
; X64: # %bb.0:
; X64-NEXT: movabsq $2387225703656530209, %rax # imm = 0x2121212121212121
; X64-NEXT: movq %rax, (%rdi)
; X64-NEXT: movq $8481, (%rdi) # imm = 0x2121
; X64-NEXT: retq
%v = shufflevector <2 x i8> <i8 4, i8 33>, <2 x i8> poison, <8 x i32> <i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
store <8 x i8> %v, ptr %p, align 8
Expand Down
Loading