Skip to content

Commit cd1fe65

Browse files
authored
Merge pull request #547 from Xilinx/bump_to_57466db7
[AutoBump] Merge with 57466db (Jan 20) (9)
2 parents 540639b + e6c513d commit cd1fe65

27 files changed

+348
-6614
lines changed

compiler-rt/lib/interception/interception_win.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -651,6 +651,10 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
651651
case 0xD284: // 84 D2 : test dl,dl
652652
return 2;
653653

654+
case 0x3980: // 80 39 XX : cmp BYTE PTR [rcx], XX
655+
case 0x4D8B: // 8B 4D XX : mov XX(%ebp), ecx
656+
case 0x558B: // 8B 55 XX : mov XX(%ebp), edx
657+
case 0x758B: // 8B 75 XX : mov XX(%ebp), esp
654658
case 0xE483: // 83 E4 XX : and esp, XX
655659
case 0xEC83: // 83 EC XX : sub esp, XX
656660
case 0xC1F6: // F6 C1 XX : test cl, XX
@@ -757,6 +761,9 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
757761
case 0xc1ff48: // 48 ff c1 : inc rcx
758762
case 0xc1ff49: // 49 ff c1 : inc r9
759763
case 0xc28b41: // 41 8b c2 : mov eax, r10d
764+
case 0x01b60f: // 0f b6 01 : movzx eax, BYTE PTR [rcx]
765+
case 0x09b60f: // 0f b6 09 : movzx ecx, BYTE PTR [rcx]
766+
case 0x11b60f: // 0f b6 11 : movzx edx, BYTE PTR [rcx]
760767
case 0xc2b60f: // 0f b6 c2 : movzx eax, dl
761768
case 0xc2ff48: // 48 ff c2 : inc rdx
762769
case 0xc2ff49: // 49 ff c2 : inc r10
@@ -775,6 +782,7 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
775782
case 0xc98548: // 48 85 c9 : test rcx, rcx
776783
case 0xc9854d: // 4d 85 c9 : test r9, r9
777784
case 0xc98b4c: // 4c 8b c9 : mov r9, rcx
785+
case 0xd12948: // 48 29 d1 : sub rcx, rdx
778786
case 0xca2b48: // 48 2b ca : sub rcx, rdx
779787
case 0xca3b48: // 48 3b ca : cmp rcx, rdx
780788
case 0xd12b48: // 48 2b d1 : sub rdx, rcx
@@ -784,16 +792,33 @@ static size_t GetInstructionSize(uptr address, size_t* rel_offset = nullptr) {
784792
case 0xd2854d: // 4d 85 d2 : test r10, r10
785793
case 0xd28b4c: // 4c 8b d2 : mov r10, rdx
786794
case 0xd2b60f: // 0f b6 d2 : movzx edx, dl
795+
case 0xd2be0f: // 0f be d2 : movsx edx, dl
787796
case 0xd98b4c: // 4c 8b d9 : mov r11, rcx
788797
case 0xd9f748: // 48 f7 d9 : neg rcx
798+
case 0xc03145: // 45 31 c0 : xor r8d,r8d
799+
case 0xc93145: // 45 31 c9 : xor r9d,r9d
789800
case 0xdb3345: // 45 33 db : xor r11d, r11d
801+
case 0xc08445: // 45 84 c0 : test r8b,r8b
802+
case 0xd28445: // 45 84 d2 : test r10b,r10b
790803
case 0xdb8548: // 48 85 db : test rbx, rbx
791804
case 0xdb854d: // 4d 85 db : test r11, r11
792805
case 0xdc8b4c: // 4c 8b dc : mov r11, rsp
793806
case 0xe48548: // 48 85 e4 : test rsp, rsp
794807
case 0xe4854d: // 4d 85 e4 : test r12, r12
808+
case 0xc88948: // 48 89 c8 : mov rax,rcx
809+
case 0xcb8948: // 48 89 cb : mov rbx,rcx
810+
case 0xd08948: // 48 89 d0 : mov rax,rdx
811+
case 0xd18948: // 48 89 d1 : mov rcx,rdx
812+
case 0xd38948: // 48 89 d3 : mov rbx,rdx
795813
case 0xe58948: // 48 89 e5 : mov rbp, rsp
796814
case 0xed8548: // 48 85 ed : test rbp, rbp
815+
case 0xc88949: // 49 89 c8 : mov r8, rcx
816+
case 0xc98949: // 49 89 c9 : mov r9, rcx
817+
case 0xca8949: // 49 89 ca : mov r10,rcx
818+
case 0xd08949: // 49 89 d0 : mov r8, rdx
819+
case 0xd18949: // 49 89 d1 : mov r9, rdx
820+
case 0xd28949: // 49 89 d2 : mov r10, rdx
821+
case 0xd38949: // 49 89 d3 : mov r11, rdx
797822
case 0xed854d: // 4d 85 ed : test r13, r13
798823
case 0xf6854d: // 4d 85 f6 : test r14, r14
799824
case 0xff854d: // 4d 85 ff : test r15, r15

compiler-rt/lib/interception/tests/interception_win_test.cpp

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,8 +857,12 @@ const struct InstructionSizeData {
857857
{ 2, {0x8B, 0xC1}, 0, "8B C1 : mov eax, ecx"},
858858
{ 2, {0x8B, 0xEC}, 0, "8B EC : mov ebp, esp"},
859859
{ 2, {0x8B, 0xFF}, 0, "8B FF : mov edi, edi"},
860+
{ 3, {0x80, 0x39, 0x72}, 0, "80 39 XX : cmp BYTE PTR [rcx], XX"},
860861
{ 3, {0x83, 0xE4, 0x72}, 0, "83 E4 XX : and esp, XX"},
861862
{ 3, {0x83, 0xEC, 0x72}, 0, "83 EC XX : sub esp, XX"},
863+
{ 3, {0x8B, 0x4D, 0x72}, 0, "8B 4D XX : mov XX(%ebp), ecx"},
864+
{ 3, {0x8B, 0x55, 0x72}, 0, "8B 55 XX : mov XX(%ebp), edx"},
865+
{ 3, {0x8B, 0x75, 0x72}, 0, "8B 75 XX : mov XX(%ebp), esp"},
862866
{ 3, {0xc2, 0x71, 0x72}, 0, "C2 XX XX : ret XX (needed for registering weak functions)"},
863867
{ 5, {0x68, 0x71, 0x72, 0x73, 0x74}, 0, "68 XX XX XX XX : push imm32"},
864868
{ 5, {0xb8, 0x71, 0x72, 0x73, 0x74}, 0, "b8 XX XX XX XX : mov eax, XX XX XX XX"},
@@ -881,17 +885,26 @@ const struct InstructionSizeData {
881885
{ 2, {0x66, 0x90}, 0, "66 90 : Two-byte NOP"},
882886
{ 2, {0x84, 0xc0}, 0, "84 c0 : test al, al"},
883887
{ 2, {0x8a, 0x01}, 0, "8a 01 : mov al, byte ptr [rcx]"},
888+
{ 3, {0x0f, 0xb6, 0x01}, 0, "0f b6 01 : movzx eax, BYTE PTR [rcx]"},
889+
{ 3, {0x0f, 0xb6, 0x09}, 0, "0f b6 09 : movzx ecx, BYTE PTR [rcx]"},
890+
{ 3, {0x0f, 0xb6, 0x11}, 0, "0f b6 11 : movzx edx, BYTE PTR [rcx]"},
884891
{ 3, {0x0f, 0xb6, 0xc2}, 0, "0f b6 c2 : movzx eax, dl"},
885892
{ 3, {0x0f, 0xb6, 0xd2}, 0, "0f b6 d2 : movzx edx, dl"},
886893
{ 3, {0x0f, 0xb7, 0x10}, 0, "0f b7 10 : movzx edx, WORD PTR [rax]"},
894+
{ 3, {0x0f, 0xbe, 0xd2}, 0, "0f be d2 : movsx edx, dl"},
887895
{ 3, {0x41, 0x8b, 0xc0}, 0, "41 8b c0 : mov eax, r8d"},
888896
{ 3, {0x41, 0x8b, 0xc1}, 0, "41 8b c1 : mov eax, r9d"},
889897
{ 3, {0x41, 0x8b, 0xc2}, 0, "41 8b c2 : mov eax, r10d"},
890898
{ 3, {0x41, 0x8b, 0xc3}, 0, "41 8b c3 : mov eax, r11d"},
891899
{ 3, {0x41, 0x8b, 0xc4}, 0, "41 8b c4 : mov eax, r12d"},
900+
{ 3, {0x45, 0x31, 0xc0}, 0, "45 31 c0 : xor r8d,r8d"},
901+
{ 3, {0x45, 0x31, 0xc9}, 0, "45 31 c9 : xor r9d,r9d"},
892902
{ 3, {0x45, 0x33, 0xc0}, 0, "45 33 c0 : xor r8d, r8d"},
893903
{ 3, {0x45, 0x33, 0xc9}, 0, "45 33 c9 : xor r9d, r9d"},
894904
{ 3, {0x45, 0x33, 0xdb}, 0, "45 33 db : xor r11d, r11d"},
905+
{ 3, {0x45, 0x84, 0xc0}, 0, "45 84 c0 : test r8b,r8b"},
906+
{ 3, {0x45, 0x84, 0xd2}, 0, "45 84 d2 : test r10b,r10b"},
907+
{ 3, {0x48, 0x29, 0xd1}, 0, "48 29 d1 : sub rcx, rdx"},
895908
{ 3, {0x48, 0x2b, 0xca}, 0, "48 2b ca : sub rcx, rdx"},
896909
{ 3, {0x48, 0x2b, 0xd1}, 0, "48 2b d1 : sub rdx, rcx"},
897910
{ 3, {0x48, 0x3b, 0xca}, 0, "48 3b ca : cmp rcx, rdx"},
@@ -901,6 +914,11 @@ const struct InstructionSizeData {
901914
{ 3, {0x48, 0x85, 0xdb}, 0, "48 85 db : test rbx, rbx"},
902915
{ 3, {0x48, 0x85, 0xe4}, 0, "48 85 e4 : test rsp, rsp"},
903916
{ 3, {0x48, 0x85, 0xed}, 0, "48 85 ed : test rbp, rbp"},
917+
{ 3, {0x48, 0x89, 0xc8}, 0, "48 89 c8 : mov rax,rcx"},
918+
{ 3, {0x48, 0x89, 0xcb}, 0, "48 89 cb : mov rbx,rcx"},
919+
{ 3, {0x48, 0x89, 0xd0}, 0, "48 89 d0 : mov rax,rdx"},
920+
{ 3, {0x48, 0x89, 0xd1}, 0, "48 89 d1 : mov rcx,rdx"},
921+
{ 3, {0x48, 0x89, 0xd3}, 0, "48 89 d3 : mov rbx,rdx"},
904922
{ 3, {0x48, 0x89, 0xe5}, 0, "48 89 e5 : mov rbp, rsp"},
905923
{ 3, {0x48, 0x8b, 0xc1}, 0, "48 8b c1 : mov rax, rcx"},
906924
{ 3, {0x48, 0x8b, 0xc4}, 0, "48 8b c4 : mov rax, rsp"},
@@ -912,6 +930,13 @@ const struct InstructionSizeData {
912930
{ 3, {0x48, 0xff, 0xc3}, 0, "48 ff c3 : inc rbx"},
913931
{ 3, {0x48, 0xff, 0xc6}, 0, "48 ff c6 : inc rsi"},
914932
{ 3, {0x48, 0xff, 0xc7}, 0, "48 ff c7 : inc rdi"},
933+
{ 3, {0x49, 0x89, 0xc8}, 0, "49 89 c8 : mov r8, rcx"},
934+
{ 3, {0x49, 0x89, 0xc9}, 0, "49 89 c9 : mov r9, rcx"},
935+
{ 3, {0x49, 0x89, 0xca}, 0, "49 89 ca : mov r10,rcx"},
936+
{ 3, {0x49, 0x89, 0xd0}, 0, "49 89 d0 : mov r8, rdx"},
937+
{ 3, {0x49, 0x89, 0xd1}, 0, "49 89 d1 : mov r9, rdx"},
938+
{ 3, {0x49, 0x89, 0xd2}, 0, "49 89 d2 : mov r10, rdx"},
939+
{ 3, {0x49, 0x89, 0xd3}, 0, "49 89 d3 : mov r11, rdx"},
915940
{ 3, {0x49, 0xff, 0xc0}, 0, "49 ff c0 : inc r8"},
916941
{ 3, {0x49, 0xff, 0xc1}, 0, "49 ff c1 : inc r9"},
917942
{ 3, {0x49, 0xff, 0xc2}, 0, "49 ff c2 : inc r10"},

llvm/include/llvm/CodeGen/ISDOpcodes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,6 +1480,10 @@ enum NodeType {
14801480
// Output: Output Chain
14811481
EXPERIMENTAL_VECTOR_HISTOGRAM,
14821482

1483+
// Finds the index of the last active mask element
1484+
// Operands: Mask
1485+
VECTOR_FIND_LAST_ACTIVE,
1486+
14831487
// llvm.clear_cache intrinsic
14841488
// Operands: Input Chain, Start Addres, End Address
14851489
// Outputs: Output Chain

llvm/include/llvm/CodeGen/TargetLowering.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5368,6 +5368,11 @@ class TargetLowering : public TargetLoweringBase {
53685368
/// \returns The expansion result or SDValue() if it fails.
53695369
SDValue expandVPCTTZElements(SDNode *N, SelectionDAG &DAG) const;
53705370

5371+
/// Expand VECTOR_FIND_LAST_ACTIVE nodes
5372+
/// \param N Node to expand
5373+
/// \returns The expansion result or SDValue() if it fails.
5374+
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const;
5375+
53715376
/// Expand ABS nodes. Expands vector/scalar ABS nodes,
53725377
/// vector nodes can only succeed if all operations are legal/custom.
53735378
/// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
155155
case ISD::ZERO_EXTEND_VECTOR_INREG:
156156
Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
157157

158+
case ISD::VECTOR_FIND_LAST_ACTIVE:
159+
Res = PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(N);
160+
break;
161+
158162
case ISD::SIGN_EXTEND:
159163
case ISD::VP_SIGN_EXTEND:
160164
case ISD::ZERO_EXTEND:
@@ -2069,6 +2073,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
20692073
case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
20702074
Res = PromoteIntOp_VECTOR_HISTOGRAM(N, OpNo);
20712075
break;
2076+
case ISD::VECTOR_FIND_LAST_ACTIVE:
2077+
Res = PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(N, OpNo);
2078+
break;
20722079
}
20732080

20742081
// If the result is null, the sub-method took care of registering results etc.
@@ -2810,6 +2817,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N,
28102817
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
28112818
}
28122819

2820+
SDValue DAGTypeLegalizer::PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N,
2821+
unsigned OpNo) {
2822+
SmallVector<SDValue, 1> NewOps(N->ops());
2823+
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
2824+
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
2825+
}
2826+
28132827
//===----------------------------------------------------------------------===//
28142828
// Integer Result Expansion
28152829
//===----------------------------------------------------------------------===//
@@ -6120,6 +6134,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
61206134
return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
61216135
}
61226136

6137+
SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
6138+
EVT VT = N->getValueType(0);
6139+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6140+
return DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, SDLoc(N), NVT, N->ops());
6141+
}
6142+
61236143
SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
61246144
EVT OutVT = N->getValueType(0);
61256145
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
378378
SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
379379
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
380380
SDValue PromoteIntRes_PATCHPOINT(SDNode *N);
381+
SDValue PromoteIntRes_VECTOR_FIND_LAST_ACTIVE(SDNode *N);
381382

382383
// Integer Operand Promotion.
383384
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -428,6 +429,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
428429
SDValue PromoteIntOp_VP_STRIDED(SDNode *N, unsigned OpNo);
429430
SDValue PromoteIntOp_VP_SPLICE(SDNode *N, unsigned OpNo);
430431
SDValue PromoteIntOp_VECTOR_HISTOGRAM(SDNode *N, unsigned OpNo);
432+
SDValue PromoteIntOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N, unsigned OpNo);
431433

432434
void SExtOrZExtPromotedOperands(SDValue &LHS, SDValue &RHS);
433435
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
503503
case ISD::VECREDUCE_FMIN:
504504
case ISD::VECREDUCE_FMAXIMUM:
505505
case ISD::VECREDUCE_FMINIMUM:
506+
case ISD::VECTOR_FIND_LAST_ACTIVE:
506507
Action = TLI.getOperationAction(Node->getOpcode(),
507508
Node->getOperand(0).getValueType());
508509
break;
@@ -1225,6 +1226,9 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
12251226
case ISD::VECTOR_COMPRESS:
12261227
Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
12271228
return;
1229+
case ISD::VECTOR_FIND_LAST_ACTIVE:
1230+
Results.push_back(TLI.expandVectorFindLastActive(Node, DAG));
1231+
return;
12281232
case ISD::SCMP:
12291233
case ISD::UCMP:
12301234
Results.push_back(TLI.expandCMP(Node, DAG));

llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -6427,42 +6427,25 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
64276427
assert(Intrinsic == Intrinsic::experimental_vector_extract_last_active &&
64286428
"Tried lowering invalid vector extract last");
64296429
SDLoc sdl = getCurSDLoc();
6430+
const DataLayout &Layout = DAG.getDataLayout();
64306431
SDValue Data = getValue(I.getOperand(0));
64316432
SDValue Mask = getValue(I.getOperand(1));
6432-
SDValue PassThru = getValue(I.getOperand(2));
64336433

6434-
EVT DataVT = Data.getValueType();
6435-
EVT ScalarVT = PassThru.getValueType();
6436-
EVT BoolVT = Mask.getValueType().getScalarType();
6437-
6438-
// Find a suitable type for a stepvector.
6439-
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Dummy value.
6440-
if (DataVT.isScalableVector())
6441-
VScaleRange = getVScaleRange(I.getCaller(), 64);
64426434
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6443-
unsigned EltWidth = TLI.getBitWidthForCttzElements(
6444-
I.getType(), DataVT.getVectorElementCount(), /*ZeroIsPoison=*/true,
6445-
&VScaleRange);
6446-
MVT StepVT = MVT::getIntegerVT(EltWidth);
6447-
EVT StepVecVT = DataVT.changeVectorElementType(StepVT);
6448-
6449-
// Zero out lanes with inactive elements, then find the highest remaining
6450-
// value from the stepvector.
6451-
SDValue Zeroes = DAG.getConstant(0, sdl, StepVecVT);
6452-
SDValue StepVec = DAG.getStepVector(sdl, StepVecVT);
6453-
SDValue ActiveElts = DAG.getSelect(sdl, StepVecVT, Mask, StepVec, Zeroes);
6454-
SDValue HighestIdx =
6455-
DAG.getNode(ISD::VECREDUCE_UMAX, sdl, StepVT, ActiveElts);
6456-
6457-
// Extract the corresponding lane from the data vector
6458-
EVT ExtVT = TLI.getVectorIdxTy(DAG.getDataLayout());
6459-
SDValue Idx = DAG.getZExtOrTrunc(HighestIdx, sdl, ExtVT);
6460-
SDValue Extract =
6461-
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ScalarVT, Data, Idx);
6462-
6463-
// If all mask lanes were inactive, choose the passthru value instead.
6464-
SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask);
6465-
SDValue Result = DAG.getSelect(sdl, ScalarVT, AnyActive, Extract, PassThru);
6435+
EVT ResVT = TLI.getValueType(Layout, I.getType());
6436+
6437+
EVT ExtVT = TLI.getVectorIdxTy(Layout);
6438+
SDValue Idx = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, sdl, ExtVT, Mask);
6439+
SDValue Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, sdl, ResVT, Data, Idx);
6440+
6441+
Value *Default = I.getOperand(2);
6442+
if (!isa<PoisonValue>(Default) && !isa<UndefValue>(Default)) {
6443+
SDValue PassThru = getValue(Default);
6444+
EVT BoolVT = Mask.getValueType().getScalarType();
6445+
SDValue AnyActive = DAG.getNode(ISD::VECREDUCE_OR, sdl, BoolVT, Mask);
6446+
Result = DAG.getSelect(sdl, ResVT, AnyActive, Result, PassThru);
6447+
}
6448+
64666449
setValue(&I, Result);
64676450
}
64686451

llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
567567
case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM:
568568
return "histogram";
569569

570+
case ISD::VECTOR_FIND_LAST_ACTIVE:
571+
return "find_last_active";
572+
570573
// Vector Predication
571574
#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \
572575
case ISD::SDID: \

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include "llvm/CodeGen/TargetLowering.h"
1414
#include "llvm/ADT/STLExtras.h"
15+
#include "llvm/Analysis/ValueTracking.h"
1516
#include "llvm/Analysis/VectorUtils.h"
1617
#include "llvm/CodeGen/CallingConvLower.h"
1718
#include "llvm/CodeGen/CodeGenCommonISel.h"
@@ -9451,6 +9452,43 @@ SDValue TargetLowering::expandVPCTTZElements(SDNode *N,
94519452
return DAG.getNode(ISD::VP_REDUCE_UMIN, DL, ResVT, ExtEVL, Select, Mask, EVL);
94529453
}
94539454

9455+
SDValue TargetLowering::expandVectorFindLastActive(SDNode *N,
9456+
SelectionDAG &DAG) const {
9457+
SDLoc DL(N);
9458+
SDValue Mask = N->getOperand(0);
9459+
EVT MaskVT = Mask.getValueType();
9460+
EVT BoolVT = MaskVT.getScalarType();
9461+
9462+
// Find a suitable type for a stepvector.
9463+
ConstantRange VScaleRange(1, /*isFullSet=*/true); // Fixed length default.
9464+
if (MaskVT.isScalableVector())
9465+
VScaleRange = getVScaleRange(&DAG.getMachineFunction().getFunction(), 64);
9466+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9467+
unsigned EltWidth = TLI.getBitWidthForCttzElements(
9468+
BoolVT.getTypeForEVT(*DAG.getContext()), MaskVT.getVectorElementCount(),
9469+
/*ZeroIsPoison=*/true, &VScaleRange);
9470+
EVT StepVT = MVT::getIntegerVT(EltWidth);
9471+
EVT StepVecVT = MaskVT.changeVectorElementType(StepVT);
9472+
9473+
// If promotion is required to make the type legal, do it here; promotion
9474+
// of integers within LegalizeVectorOps is looking for types of the same
9475+
// size but with a smaller number of larger elements, not the usual larger
9476+
// size with the same number of larger elements.
9477+
if (TLI.getTypeAction(StepVecVT.getSimpleVT()) ==
9478+
TargetLowering::TypePromoteInteger) {
9479+
StepVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), StepVecVT);
9480+
StepVT = StepVecVT.getVectorElementType();
9481+
}
9482+
9483+
// Zero out lanes with inactive elements, then find the highest remaining
9484+
// value from the stepvector.
9485+
SDValue Zeroes = DAG.getConstant(0, DL, StepVecVT);
9486+
SDValue StepVec = DAG.getStepVector(DL, StepVecVT);
9487+
SDValue ActiveElts = DAG.getSelect(DL, StepVecVT, Mask, StepVec, Zeroes);
9488+
SDValue HighestIdx = DAG.getNode(ISD::VECREDUCE_UMAX, DL, StepVT, ActiveElts);
9489+
return DAG.getZExtOrTrunc(HighestIdx, DL, N->getValueType(0));
9490+
}
9491+
94549492
SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
94559493
bool IsNegative) const {
94569494
SDLoc dl(N);

0 commit comments

Comments
 (0)