Skip to content

Commit 66e7dce

Browse files
committed
Revert "[X86][SSE] Shuffle combine blends to OR(X,Y) if the relevant elements are known zero."
This reverts commit 219f32f. Commit contains unsigned compasions that break bots that build with -Wsign-compare.
1 parent 21de4e7 commit 66e7dce

File tree

5 files changed

+35
-61
lines changed

5 files changed

+35
-61
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 17 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -7401,8 +7401,8 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
74017401

74027402
// Handle OR(SHUFFLE,SHUFFLE) case where one source is zero and the other
74037403
// is a valid shuffle index.
7404-
SDValue N0 = peekThroughBitcasts(N.getOperand(0));
7405-
SDValue N1 = peekThroughBitcasts(N.getOperand(1));
7404+
SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
7405+
SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1));
74067406
if (!N0.getValueType().isVector() || !N1.getValueType().isVector())
74077407
return false;
74087408
SmallVector<int, 64> SrcMask0, SrcMask1;
@@ -7413,24 +7413,34 @@ static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts,
74137413
true))
74147414
return false;
74157415

7416+
// Shuffle inputs must be the same size as the result.
7417+
if (llvm::any_of(SrcInputs0, [VT](SDValue Op) {
7418+
return VT.getSizeInBits() != Op.getValueSizeInBits();
7419+
}))
7420+
return false;
7421+
if (llvm::any_of(SrcInputs1, [VT](SDValue Op) {
7422+
return VT.getSizeInBits() != Op.getValueSizeInBits();
7423+
}))
7424+
return false;
7425+
74167426
size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size());
74177427
SmallVector<int, 64> Mask0, Mask1;
74187428
narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0);
74197429
narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1);
7420-
for (int i = 0; i != (int)MaskSize; ++i) {
7430+
for (size_t i = 0; i != MaskSize; ++i) {
74217431
if (Mask0[i] == SM_SentinelUndef && Mask1[i] == SM_SentinelUndef)
74227432
Mask.push_back(SM_SentinelUndef);
74237433
else if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero)
74247434
Mask.push_back(SM_SentinelZero);
74257435
else if (Mask1[i] == SM_SentinelZero)
7426-
Mask.push_back(i);
7436+
Mask.push_back(Mask0[i]);
74277437
else if (Mask0[i] == SM_SentinelZero)
7428-
Mask.push_back(i + MaskSize);
7438+
Mask.push_back(Mask1[i] + (int)(MaskSize * SrcInputs0.size()));
74297439
else
74307440
return false;
74317441
}
7432-
Ops.push_back(N0);
7433-
Ops.push_back(N1);
7442+
Ops.append(SrcInputs0.begin(), SrcInputs0.end());
7443+
Ops.append(SrcInputs1.begin(), SrcInputs1.end());
74347444
return true;
74357445
}
74367446
case ISD::INSERT_SUBVECTOR: {
@@ -34209,7 +34219,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
3420934219
SelectionDAG &DAG, const X86Subtarget &Subtarget,
3421034220
unsigned &Shuffle, MVT &SrcVT, MVT &DstVT,
3421134221
bool IsUnary) {
34212-
unsigned NumMaskElts = Mask.size();
3421334222
unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
3421434223

3421534224
if (MaskVT.is128BitVector()) {
@@ -34267,46 +34276,6 @@ static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
3426734276
}
3426834277
}
3426934278

34270-
// Attempt to match against a OR if we're performing a blend shuffle and the
34271-
// non-blended source element is zero in each case.
34272-
if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 &&
34273-
(EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) {
34274-
bool IsBlend = true;
34275-
unsigned NumV1Elts = V1.getValueType().getVectorNumElements();
34276-
unsigned NumV2Elts = V2.getValueType().getVectorNumElements();
34277-
unsigned Scale1 = NumV1Elts / NumMaskElts;
34278-
unsigned Scale2 = NumV2Elts / NumMaskElts;
34279-
APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts);
34280-
APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts);
34281-
for (unsigned i = 0; i != NumMaskElts; ++i) {
34282-
int M = Mask[i];
34283-
if (M == SM_SentinelUndef)
34284-
continue;
34285-
if (M == SM_SentinelZero) {
34286-
DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
34287-
DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
34288-
continue;
34289-
}
34290-
if (M == i) {
34291-
DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2);
34292-
continue;
34293-
}
34294-
if (M == (i + NumMaskElts)) {
34295-
DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1);
34296-
continue;
34297-
}
34298-
IsBlend = false;
34299-
break;
34300-
}
34301-
if (IsBlend &&
34302-
DAG.computeKnownBits(V1, DemandedZeroV1).isZero() &&
34303-
DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) {
34304-
Shuffle = ISD::OR;
34305-
SrcVT = DstVT = EVT(MaskVT).changeTypeToInteger().getSimpleVT();
34306-
return true;
34307-
}
34308-
}
34309-
3431034279
return false;
3431134280
}
3431234281

llvm/test/CodeGen/X86/insertelement-ones.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -389,9 +389,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
389389
; SSE2-NEXT: movdqa %xmm3, %xmm4
390390
; SSE2-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
391391
; SSE2-NEXT: por %xmm4, %xmm0
392-
; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
392+
; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
393+
; SSE2-NEXT: pand %xmm5, %xmm1
393394
; SSE2-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
394-
; SSE2-NEXT: por %xmm3, %xmm1
395+
; SSE2-NEXT: pandn %xmm3, %xmm5
396+
; SSE2-NEXT: por %xmm5, %xmm1
395397
; SSE2-NEXT: pand %xmm2, %xmm1
396398
; SSE2-NEXT: por %xmm4, %xmm1
397399
; SSE2-NEXT: retq
@@ -409,9 +411,11 @@ define <32 x i8> @insert_v32i8_x123456789ABCDEzGHIJKLMNOPQRSTxx(<32 x i8> %a) {
409411
; SSE3-NEXT: movdqa %xmm3, %xmm4
410412
; SSE3-NEXT: pslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0]
411413
; SSE3-NEXT: por %xmm4, %xmm0
412-
; SSE3-NEXT: pand {{.*}}(%rip), %xmm1
414+
; SSE3-NEXT: movdqa {{.*#+}} xmm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
415+
; SSE3-NEXT: pand %xmm5, %xmm1
413416
; SSE3-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1]
414-
; SSE3-NEXT: por %xmm3, %xmm1
417+
; SSE3-NEXT: pandn %xmm3, %xmm5
418+
; SSE3-NEXT: por %xmm5, %xmm1
415419
; SSE3-NEXT: pand %xmm2, %xmm1
416420
; SSE3-NEXT: por %xmm4, %xmm1
417421
; SSE3-NEXT: retq

llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1314,10 +1314,10 @@ define void @trunc_v4i64_to_v4i8(<32 x i8>* %L, <4 x i8>* %S) nounwind {
13141314
define <16 x i8> @negative(<32 x i8> %v, <32 x i8> %w) nounwind {
13151315
; AVX1-LABEL: negative:
13161316
; AVX1: # %bb.0:
1317-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1318-
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,u,u,u,u,0,2,4,6,8,10,12,14]
1319-
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u]
1320-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5,6,7]
1317+
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm0[u,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
1318+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
1319+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u],zero,zero,zero,zero,zero,zero,zero,xmm0[0,2,4,6,8,10,12,14]
1320+
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
13211321
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
13221322
; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
13231323
; AVX1-NEXT: vzeroupper

llvm/test/CodeGen/X86/vector-shuffle-128-v8.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1713,8 +1713,9 @@ define <8 x i16> @shuffle_v8i16_XX4X8acX(<8 x i16> %a, <8 x i16> %b) {
17131713
;
17141714
; SSSE3-LABEL: shuffle_v8i16_XX4X8acX:
17151715
; SSSE3: # %bb.0:
1716-
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,u,u,u,u,0,1,4,5,8,9,u,u]
1717-
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
1716+
; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[u,u,u,u,8,9,u,u],zero,zero,zero,zero,zero,zero,xmm0[u,u]
1717+
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,u],zero,zero,xmm1[u,u,0,1,4,5,8,9,u,u]
1718+
; SSSE3-NEXT: por %xmm1, %xmm0
17181719
; SSSE3-NEXT: retq
17191720
;
17201721
; SSE41-LABEL: shuffle_v8i16_XX4X8acX:

llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3358,9 +3358,9 @@ define <32 x i8> @shuffle_v32i8_42_45_12_13_35_35_60_40_17_22_29_44_33_12_48_51_
33583358
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,xmm2[u,u],zero,zero,xmm2[12],zero,xmm2[u,u,u],zero,zero,xmm2[u,0,3]
33593359
; AVX1-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[10,13,u,u,3,3],zero,xmm1[8,u,u,u,12,1,u],zero,zero
33603360
; AVX1-NEXT: vpor %xmm2, %xmm1, %xmm1
3361-
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u,u,u,u,u,u,u,1,6,13,u,u,u,u,u]
3362-
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u,u,u,u,u,u,12,u,u]
3363-
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
3361+
; AVX1-NEXT: vpshufb {{.*#+}} xmm2 = xmm4[u,u],zero,zero,xmm4[u,u,u,u,1,6,13,u,u],zero,xmm4[u,u]
3362+
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u,12,13,u,u,u,u],zero,zero,zero,xmm0[u,u,12,u,u]
3363+
; AVX1-NEXT: vpor %xmm2, %xmm0, %xmm0
33643364
; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,255,0,0,255,255,255,255,0,0,0,255,255,0,255,255]
33653365
; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
33663366
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0

0 commit comments

Comments
 (0)