22
22
#include " llvm/ADT/SmallVector.h"
23
23
#include " llvm/Analysis/LoopInfo.h"
24
24
#include " llvm/Analysis/OptimizationRemarkEmitter.h"
25
+ #include " llvm/Analysis/TargetLibraryInfo.h"
25
26
#include " llvm/Analysis/TargetTransformInfo.h"
26
27
#include " llvm/Analysis/TargetTransformInfoImpl.h"
27
28
#include " llvm/Analysis/ValueTracking.h"
@@ -285,6 +286,64 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
285
286
return false ;
286
287
}
287
288
289
+ // / Several intrinsics that return structs (including llvm.sincos[pi] and
290
+ // / llvm.modf) can be lowered to a vector library call (for certain VFs). The
291
+ // / vector library functions correspond to the scalar calls (e.g. sincos or
292
+ // / modf), which unlike the intrinsic return values via output pointers. This
293
+ // / helper checks if a vector call exists for the given intrinsic, and returns
294
+ // / the cost, which includes the cost of the mask (if required), and the loads
295
+ // / for values returned via output pointers. \p LC is the scalar libcall and
296
+ // / \p CallRetElementIndex (optional) is the struct element which is mapped to
297
+ // / the call return value. If std::nullopt is returned, then no vector library
298
+ // / call is available, so the intrinsic should be assigned the default cost
299
+ // / (e.g. scalarization).
300
+ std::optional<InstructionCost> getMultipleResultIntrinsicVectorLibCallCost (
301
+ const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind,
302
+ RTLIB::Libcall LC, std::optional<unsigned > CallRetElementIndex = {}) {
303
+ Type *RetTy = ICA.getReturnType ();
304
+ // Vector variants of the intrinsic can be mapped to a vector library call.
305
+ auto const *LibInfo = ICA.getLibInfo ();
306
+ if (!LibInfo || !isa<StructType>(RetTy) ||
307
+ !isVectorizedStructTy (cast<StructType>(RetTy)))
308
+ return std::nullopt;
309
+
310
+ // Find associated libcall.
311
+ const char *LCName = getTLI ()->getLibcallName (LC);
312
+ if (!LCName)
313
+ return std::nullopt;
314
+
315
+ // Search for a corresponding vector variant.
316
+ LLVMContext &Ctx = RetTy->getContext ();
317
+ ElementCount VF = getVectorizedTypeVF (RetTy);
318
+ VecDesc const *VD = nullptr ;
319
+ for (bool Masked : {false , true }) {
320
+ if ((VD = LibInfo->getVectorMappingInfo (LCName, VF, Masked)))
321
+ break ;
322
+ }
323
+ if (!VD)
324
+ return std::nullopt;
325
+
326
+ // Cost the call + mask.
327
+ auto Cost =
328
+ thisT ()->getCallInstrCost (nullptr , RetTy, ICA.getArgTypes (), CostKind);
329
+ if (VD->isMasked ())
330
+ Cost += thisT ()->getShuffleCost (
331
+ TargetTransformInfo::SK_Broadcast,
332
+ VectorType::get (IntegerType::getInt1Ty (Ctx), VF), {}, CostKind, 0 ,
333
+ nullptr , {});
334
+
335
+ // Lowering to a library call (with output pointers) may require us to emit
336
+ // reloads for the results.
337
+ for (auto [Idx, VectorTy] : enumerate(getContainedTypes (RetTy))) {
338
+ if (Idx == CallRetElementIndex)
339
+ continue ;
340
+ Cost += thisT ()->getMemoryOpCost (
341
+ Instruction::Load, VectorTy,
342
+ thisT ()->getDataLayout ().getABITypeAlign (VectorTy), 0 , CostKind);
343
+ }
344
+ return Cost;
345
+ }
346
+
288
347
protected:
289
348
explicit BasicTTIImplBase (const TargetMachine *TM, const DataLayout &DL)
290
349
: BaseT(DL) {}
@@ -1716,9 +1775,9 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1716
1775
1717
1776
Type *RetTy = ICA.getReturnType ();
1718
1777
1719
- ElementCount RetVF =
1720
- (RetTy-> isVectorTy () ? cast<VectorType>(RetTy)-> getElementCount ()
1721
- : ElementCount::getFixed ( 1 ));
1778
+ ElementCount RetVF = isVectorizedTy (RetTy) ? getVectorizedTypeVF (RetTy)
1779
+ : ElementCount::getFixed ( 1 );
1780
+
1722
1781
const IntrinsicInst *I = ICA.getInst ();
1723
1782
const SmallVectorImpl<const Value *> &Args = ICA.getArgs ();
1724
1783
FastMathFlags FMF = ICA.getFlags ();
@@ -1971,6 +2030,16 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1971
2030
}
1972
2031
case Intrinsic::experimental_vector_match:
1973
2032
return thisT ()->getTypeBasedIntrinsicInstrCost (ICA, CostKind);
2033
+ case Intrinsic::sincos: {
2034
+ Type *Ty = getContainedTypes (RetTy).front ();
2035
+ EVT VT = getTLI ()->getValueType (DL, Ty);
2036
+ RTLIB::Libcall LC = RTLIB::getFSINCOS (VT.getScalarType ());
2037
+ if (auto Cost =
2038
+ getMultipleResultIntrinsicVectorLibCallCost (ICA, CostKind, LC))
2039
+ return *Cost;
2040
+ // Otherwise, fallback to default scalarization cost.
2041
+ break ;
2042
+ }
1974
2043
}
1975
2044
1976
2045
// Assume that we need to scalarize this intrinsic.)
@@ -1979,10 +2048,13 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
1979
2048
InstructionCost ScalarizationCost = InstructionCost::getInvalid ();
1980
2049
if (RetVF.isVector () && !RetVF.isScalable ()) {
1981
2050
ScalarizationCost = 0 ;
1982
- if (!RetTy->isVoidTy ())
1983
- ScalarizationCost += getScalarizationOverhead (
1984
- cast<VectorType>(RetTy),
1985
- /* Insert*/ true , /* Extract*/ false , CostKind);
2051
+ if (!RetTy->isVoidTy ()) {
2052
+ for (Type *VectorTy : getContainedTypes (RetTy)) {
2053
+ ScalarizationCost += getScalarizationOverhead (
2054
+ cast<VectorType>(VectorTy),
2055
+ /* Insert=*/ true , /* Extract=*/ false , CostKind);
2056
+ }
2057
+ }
1986
2058
ScalarizationCost +=
1987
2059
getOperandsScalarizationOverhead (Args, ICA.getArgTypes (), CostKind);
1988
2060
}
@@ -2637,27 +2709,32 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
2637
2709
// Else, assume that we need to scalarize this intrinsic. For math builtins
2638
2710
// this will emit a costly libcall, adding call overhead and spills. Make it
2639
2711
// very expensive.
2640
- if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2712
+ if (isVectorizedTy (RetTy)) {
2713
+ ArrayRef<Type *> RetVTys = getContainedTypes (RetTy);
2714
+
2641
2715
// Scalable vectors cannot be scalarized, so return Invalid.
2642
- if (isa<ScalableVectorType>(RetTy) || any_of (Tys, [](const Type *Ty) {
2643
- return isa<ScalableVectorType>(Ty);
2644
- }))
2716
+ if (any_of (concat<Type *const >(RetVTys, Tys),
2717
+ [](Type *Ty) { return isa<ScalableVectorType>(Ty); }))
2645
2718
return InstructionCost::getInvalid ();
2646
2719
2647
- InstructionCost ScalarizationCost =
2648
- SkipScalarizationCost
2649
- ? ScalarizationCostPassed
2650
- : getScalarizationOverhead (RetVTy, /* Insert*/ true ,
2651
- /* Extract*/ false , CostKind);
2720
+ InstructionCost ScalarizationCost = ScalarizationCostPassed;
2721
+ if (!SkipScalarizationCost) {
2722
+ ScalarizationCost = 0 ;
2723
+ for (Type *RetVTy : RetVTys) {
2724
+ ScalarizationCost += getScalarizationOverhead (
2725
+ cast<VectorType>(RetVTy), /* Insert=*/ true ,
2726
+ /* Extract=*/ false , CostKind);
2727
+ }
2728
+ }
2652
2729
2653
- unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)-> getNumElements ();
2730
+ unsigned ScalarCalls = getVectorizedTypeVF (RetTy). getFixedValue ();
2654
2731
SmallVector<Type *, 4 > ScalarTys;
2655
2732
for (Type *Ty : Tys) {
2656
2733
if (Ty->isVectorTy ())
2657
2734
Ty = Ty->getScalarType ();
2658
2735
ScalarTys.push_back (Ty);
2659
2736
}
2660
- IntrinsicCostAttributes Attrs (IID, RetTy-> getScalarType ( ), ScalarTys, FMF);
2737
+ IntrinsicCostAttributes Attrs (IID, toScalarizedTy (RetTy ), ScalarTys, FMF);
2661
2738
InstructionCost ScalarCost =
2662
2739
thisT ()->getIntrinsicInstrCost (Attrs, CostKind);
2663
2740
for (Type *Ty : Tys) {
0 commit comments