21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
121 Type *RetTy =
nullptr;
134 bool TypeBasedOnly =
false);
204class TargetTransformInfo;
319 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
327 const PointersChainInfo &
Info,
Type *AccessTy,
453 std::pair<const Value *, unsigned>
638 KnownBits & Known,
bool &KnownBitsComputed)
const;
645 SimplifyAndSetOp)
const;
679 bool HasBaseReg, int64_t Scale,
680 unsigned AddrSpace = 0,
783 int64_t BaseOffset,
bool HasBaseReg,
785 unsigned AddrSpace = 0)
const;
829 const APInt &DemandedElts,
830 bool Insert,
bool Extract,
883 bool IsZeroCmp)
const;
909 unsigned *
Fast =
nullptr)
const;
1080 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1091 Type *ScalarValTy)
const;
1097 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1141 unsigned NumStridedMemAccesses,
1142 unsigned NumPrefetches,
bool HasCall)
const;
1180 unsigned Opcode,
Type *Ty,
1184 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
1185 const Instruction *CxtI =
nullptr)
const;
1197 ArrayRef<int> Mask = std::nullopt,
1200 ArrayRef<const Value *> Args = std::nullopt)
const;
1251 unsigned Index)
const;
1279 Value *Op1 =
nullptr)
const;
1289 unsigned Index = -1)
const;
1298 const APInt &DemandedDstElts,
1307 const Instruction *
I =
nullptr)
const;
1331 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1349 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1354 return FMF && !(*FMF).allowReassoc();
1382 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1432 const SCEV *
Ptr =
nullptr)
const;
1456 Type *ExpectedType)
const;
1461 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1462 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1472 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1473 unsigned SrcAlign,
unsigned DestAlign,
1474 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1515 unsigned AddrSpace)
const;
1519 unsigned AddrSpace)
const;
1531 unsigned ChainSizeInBytes,
1537 unsigned ChainSizeInBytes,
1597 Align Alignment)
const;
1656 template <
typename T>
class Model;
1658 std::unique_ptr<Concept> TTIImpl;
1696 virtual std::pair<const Value *, unsigned>
1700 Value *NewV)
const = 0;
1718 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1723 SimplifyAndSetOp) = 0;
1727 int64_t BaseOffset,
bool HasBaseReg,
1728 int64_t Scale,
unsigned AddrSpace,
1749 Align Alignment) = 0;
1751 Align Alignment) = 0;
1763 bool HasBaseReg, int64_t Scale,
1764 unsigned AddrSpace) = 0;
1776 const APInt &DemandedElts,
1777 bool Insert,
bool Extract,
1797 unsigned *
Fast) = 0;
1816 Type *Ty =
nullptr)
const = 0;
1826 bool IsScalable)
const = 0;
1827 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1829 Type *ScalarValTy)
const = 0;
1831 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1849 unsigned NumStridedMemAccesses,
1850 unsigned NumPrefetches,
1851 bool HasCall)
const = 0;
1881 unsigned Index) = 0;
1896 unsigned Index) = 0;
1900 const APInt &DemandedDstElts,
1918 bool VariableMask,
Align Alignment,
1925 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
1928 std::optional<FastMathFlags> FMF,
1955 Type *ExpectedType) = 0;
1958 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1959 std::optional<uint32_t> AtomicElementSize)
const = 0;
1963 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1964 unsigned SrcAlign,
unsigned DestAlign,
1965 std::optional<uint32_t> AtomicCpySize)
const = 0;
1978 unsigned AddrSpace)
const = 0;
1981 unsigned AddrSpace)
const = 0;
1986 unsigned ChainSizeInBytes,
1989 unsigned ChainSizeInBytes,
2003 Align Alignment)
const = 0;
2010template <
typename T>
2015 Model(
T Impl) : Impl(std::move(Impl)) {}
2016 ~Model()
override =
default;
2018 const DataLayout &getDataLayout()
const override {
2019 return Impl.getDataLayout();
2023 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2028 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2030 const PointersChainInfo &
Info,
2035 unsigned getInliningThresholdMultiplier()
const override {
2036 return Impl.getInliningThresholdMultiplier();
2038 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2039 return Impl.adjustInliningThreshold(CB);
2041 int getInlinerVectorBonusPercent()
const override {
2042 return Impl.getInlinerVectorBonusPercent();
2044 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2045 return Impl.getMemcpyCost(
I);
2047 InstructionCost getInstructionCost(
const User *U,
2052 BranchProbability getPredictableBranchThreshold()
override {
2053 return Impl.getPredictableBranchThreshold();
2055 bool hasBranchDivergence()
override {
return Impl.hasBranchDivergence(); }
2056 bool isSourceOfDivergence(
const Value *V)
override {
2057 return Impl.isSourceOfDivergence(V);
2060 bool isAlwaysUniform(
const Value *V)
override {
2061 return Impl.isAlwaysUniform(V);
2064 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2065 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2068 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2070 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2072 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2075 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2076 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2080 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2081 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2084 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2085 return Impl.getAssumedAddrSpace(V);
2088 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2090 std::pair<const Value *, unsigned>
2091 getPredicatedAddrSpace(
const Value *V)
const override {
2092 return Impl.getPredicatedAddrSpace(V);
2095 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2096 Value *NewV)
const override {
2097 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2100 bool isLoweredToCall(
const Function *
F)
override {
2101 return Impl.isLoweredToCall(
F);
2103 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2104 UnrollingPreferences &UP,
2105 OptimizationRemarkEmitter *ORE)
override {
2106 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2108 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2109 PeelingPreferences &PP)
override {
2110 return Impl.getPeelingPreferences(L, SE, PP);
2112 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2113 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2114 HardwareLoopInfo &HWLoopInfo)
override {
2115 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2117 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2118 return Impl.preferPredicateOverEpilogue(TFI);
2121 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2122 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2124 std::optional<Instruction *>
2125 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2126 return Impl.instCombineIntrinsic(IC, II);
2128 std::optional<Value *>
2129 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2130 APInt DemandedMask, KnownBits &Known,
2131 bool &KnownBitsComputed)
override {
2132 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2135 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2136 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2137 APInt &UndefElts2, APInt &UndefElts3,
2138 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2139 SimplifyAndSetOp)
override {
2140 return Impl.simplifyDemandedVectorEltsIntrinsic(
2141 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2144 bool isLegalAddImmediate(int64_t Imm)
override {
2145 return Impl.isLegalAddImmediate(Imm);
2147 bool isLegalICmpImmediate(int64_t Imm)
override {
2148 return Impl.isLegalICmpImmediate(Imm);
2150 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2151 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2152 Instruction *
I)
override {
2153 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2156 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2157 const TargetTransformInfo::LSRCost &C2)
override {
2158 return Impl.isLSRCostLess(C1, C2);
2160 bool isNumRegsMajorCostOfLSR()
override {
2161 return Impl.isNumRegsMajorCostOfLSR();
2163 bool isProfitableLSRChainElement(Instruction *
I)
override {
2164 return Impl.isProfitableLSRChainElement(
I);
2166 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2167 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2168 DominatorTree *DT, AssumptionCache *AC,
2169 TargetLibraryInfo *LibInfo)
override {
2170 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2173 getPreferredAddressingMode(
const Loop *L,
2174 ScalarEvolution *SE)
const override {
2175 return Impl.getPreferredAddressingMode(L, SE);
2177 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2178 return Impl.isLegalMaskedStore(DataType, Alignment);
2180 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2181 return Impl.isLegalMaskedLoad(DataType, Alignment);
2183 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2184 return Impl.isLegalNTStore(DataType, Alignment);
2186 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2187 return Impl.isLegalNTLoad(DataType, Alignment);
2189 bool isLegalBroadcastLoad(Type *ElementTy,
2190 ElementCount NumElements)
const override {
2191 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2193 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2194 return Impl.isLegalMaskedScatter(DataType, Alignment);
2196 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2197 return Impl.isLegalMaskedGather(DataType, Alignment);
2199 bool forceScalarizeMaskedGather(
VectorType *DataType,
2200 Align Alignment)
override {
2201 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2203 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2204 Align Alignment)
override {
2205 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2207 bool isLegalMaskedCompressStore(Type *DataType)
override {
2208 return Impl.isLegalMaskedCompressStore(DataType);
2210 bool isLegalMaskedExpandLoad(Type *DataType)
override {
2211 return Impl.isLegalMaskedExpandLoad(DataType);
2213 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2214 const SmallBitVector &OpcodeMask)
const override {
2215 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2217 bool enableOrderedReductions()
override {
2218 return Impl.enableOrderedReductions();
2220 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2221 return Impl.hasDivRemOp(DataType, IsSigned);
2223 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2224 return Impl.hasVolatileVariant(
I, AddrSpace);
2226 bool prefersVectorizedAddressing()
override {
2227 return Impl.prefersVectorizedAddressing();
2229 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2230 int64_t BaseOffset,
bool HasBaseReg,
2232 unsigned AddrSpace)
override {
2233 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2236 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2237 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2238 return Impl.isTruncateFree(Ty1, Ty2);
2240 bool isProfitableToHoist(Instruction *
I)
override {
2241 return Impl.isProfitableToHoist(
I);
2243 bool useAA()
override {
return Impl.useAA(); }
2244 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2245 unsigned getRegUsageForType(Type *Ty)
override {
2246 return Impl.getRegUsageForType(Ty);
2248 bool shouldBuildLookupTables()
override {
2249 return Impl.shouldBuildLookupTables();
2251 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2252 return Impl.shouldBuildLookupTablesForConstant(
C);
2254 bool shouldBuildRelLookupTables()
override {
2255 return Impl.shouldBuildRelLookupTables();
2257 bool useColdCCForColdCall(Function &
F)
override {
2258 return Impl.useColdCCForColdCall(
F);
2261 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2262 const APInt &DemandedElts,
2263 bool Insert,
bool Extract,
2265 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2269 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2270 ArrayRef<Type *> Tys,
2272 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2275 bool supportsEfficientVectorElementLoadStore()
override {
2276 return Impl.supportsEfficientVectorElementLoadStore();
2279 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2280 bool supportsTailCallFor(
const CallBase *CB)
override {
2281 return Impl.supportsTailCallFor(CB);
2284 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2285 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2287 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2288 bool IsZeroCmp)
const override {
2289 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2291 bool enableInterleavedAccessVectorization()
override {
2292 return Impl.enableInterleavedAccessVectorization();
2294 bool enableSelectOptimize()
override {
2295 return Impl.enableSelectOptimize();
2297 bool enableMaskedInterleavedAccessVectorization()
override {
2298 return Impl.enableMaskedInterleavedAccessVectorization();
2300 bool isFPVectorizationPotentiallyUnsafe()
override {
2301 return Impl.isFPVectorizationPotentiallyUnsafe();
2303 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2305 unsigned *
Fast)
override {
2310 return Impl.getPopcntSupport(IntTyWidthInBit);
2312 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2314 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2315 return Impl.isExpensiveToSpeculativelyExecute(
I);
2318 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2319 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2322 InstructionCost getFPOpCost(Type *Ty)
override {
2323 return Impl.getFPOpCost(Ty);
2326 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2327 const APInt &Imm, Type *Ty)
override {
2328 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2330 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2332 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2334 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2335 const APInt &Imm, Type *Ty,
2337 Instruction *Inst =
nullptr)
override {
2338 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2341 const APInt &Imm, Type *Ty,
2343 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2345 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2346 return Impl.getNumberOfRegisters(ClassID);
2348 unsigned getRegisterClassForType(
bool Vector,
2349 Type *Ty =
nullptr)
const override {
2350 return Impl.getRegisterClassForType(
Vector, Ty);
2352 const char *getRegisterClassName(
unsigned ClassID)
const override {
2353 return Impl.getRegisterClassName(ClassID);
2355 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2356 return Impl.getRegisterBitWidth(K);
2358 unsigned getMinVectorRegisterBitWidth()
const override {
2359 return Impl.getMinVectorRegisterBitWidth();
2361 std::optional<unsigned>
getMaxVScale()
const override {
2362 return Impl.getMaxVScale();
2364 std::optional<unsigned> getVScaleForTuning()
const override {
2365 return Impl.getVScaleForTuning();
2367 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2368 return Impl.isVScaleKnownToBeAPowerOfTwo();
2370 bool shouldMaximizeVectorBandwidth(
2372 return Impl.shouldMaximizeVectorBandwidth(K);
2374 ElementCount getMinimumVF(
unsigned ElemWidth,
2375 bool IsScalable)
const override {
2376 return Impl.getMinimumVF(ElemWidth, IsScalable);
2378 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2379 return Impl.getMaximumVF(ElemWidth, Opcode);
2381 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2382 Type *ScalarValTy)
const override {
2383 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2385 bool shouldConsiderAddressTypePromotion(
2386 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2387 return Impl.shouldConsiderAddressTypePromotion(
2388 I, AllowPromotionWithoutCommonHeader);
2390 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2391 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2392 return Impl.getCacheSize(Level);
2394 std::optional<unsigned>
2395 getCacheAssociativity(
CacheLevel Level)
const override {
2396 return Impl.getCacheAssociativity(Level);
2401 unsigned getPrefetchDistance()
const override {
2402 return Impl.getPrefetchDistance();
2408 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2409 unsigned NumStridedMemAccesses,
2410 unsigned NumPrefetches,
2411 bool HasCall)
const override {
2412 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2413 NumPrefetches, HasCall);
2419 unsigned getMaxPrefetchIterationsAhead()
const override {
2420 return Impl.getMaxPrefetchIterationsAhead();
2424 bool enableWritePrefetching()
const override {
2425 return Impl.enableWritePrefetching();
2429 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2430 return Impl.shouldPrefetchAddressSpace(AS);
2433 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2434 return Impl.getMaxInterleaveFactor(VF);
2436 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2438 ProfileSummaryInfo *PSI,
2439 BlockFrequencyInfo *BFI)
override {
2440 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2442 InstructionCost getArithmeticInstrCost(
2444 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2445 ArrayRef<const Value *> Args,
2446 const Instruction *CxtI =
nullptr)
override {
2447 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2455 ArrayRef<const Value *> Args)
override {
2456 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args);
2458 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2461 const Instruction *
I)
override {
2462 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2464 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2466 unsigned Index)
override {
2467 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2470 const Instruction *
I =
nullptr)
override {
2471 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2473 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2476 const Instruction *
I)
override {
2477 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2479 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2481 unsigned Index, Value *Op0,
2482 Value *Op1)
override {
2483 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2485 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2487 unsigned Index)
override {
2491 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2492 const APInt &DemandedDstElts,
2494 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2497 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2500 OperandValueInfo OpInfo,
2501 const Instruction *
I)
override {
2505 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2508 const Instruction *
I)
override {
2509 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2512 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2515 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2519 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2520 bool VariableMask, Align Alignment,
2522 const Instruction *
I =
nullptr)
override {
2523 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2526 InstructionCost getInterleavedMemoryOpCost(
2527 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2529 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2530 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2532 UseMaskForCond, UseMaskForGaps);
2535 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2536 std::optional<FastMathFlags> FMF,
2538 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2544 return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF,
CostKind);
2547 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2550 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2554 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2556 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2558 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2560 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2562 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2563 ArrayRef<Type *> Tys,
2567 unsigned getNumberOfParts(Type *Tp)
override {
2568 return Impl.getNumberOfParts(Tp);
2570 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2571 const SCEV *
Ptr)
override {
2572 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2574 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2575 return Impl.getCostOfKeepingLiveOverCall(Tys);
2577 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2578 MemIntrinsicInfo &
Info)
override {
2579 return Impl.getTgtMemIntrinsic(Inst,
Info);
2581 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2582 return Impl.getAtomicMemIntrinsicMaxElementSize();
2584 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2585 Type *ExpectedType)
override {
2586 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2588 Type *getMemcpyLoopLoweringType(
2589 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2590 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2591 std::optional<uint32_t> AtomicElementSize)
const override {
2592 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2593 DestAddrSpace, SrcAlign, DestAlign,
2596 void getMemcpyLoopResidualLoweringType(
2597 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2598 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2599 unsigned SrcAlign,
unsigned DestAlign,
2600 std::optional<uint32_t> AtomicCpySize)
const override {
2601 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2602 SrcAddrSpace, DestAddrSpace,
2603 SrcAlign, DestAlign, AtomicCpySize);
2606 const Function *
Callee)
const override {
2607 return Impl.areInlineCompatible(Caller,
Callee);
2609 bool areTypesABICompatible(
const Function *Caller,
const Function *
Callee,
2610 const ArrayRef<Type *> &Types)
const override {
2611 return Impl.areTypesABICompatible(Caller,
Callee, Types);
2614 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2617 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2619 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2620 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2622 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2623 return Impl.isLegalToVectorizeLoad(LI);
2625 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2626 return Impl.isLegalToVectorizeStore(SI);
2628 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2629 unsigned AddrSpace)
const override {
2630 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2633 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2634 unsigned AddrSpace)
const override {
2635 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2638 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2639 ElementCount VF)
const override {
2640 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2642 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2643 return Impl.isElementTypeLegalForScalableVector(Ty);
2645 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2646 unsigned ChainSizeInBytes,
2648 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2650 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2651 unsigned ChainSizeInBytes,
2653 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2655 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2656 ReductionFlags
Flags)
const override {
2657 return Impl.preferInLoopReduction(Opcode, Ty,
Flags);
2659 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2660 ReductionFlags
Flags)
const override {
2661 return Impl.preferPredicatedReductionSelect(Opcode, Ty,
Flags);
2663 bool preferEpilogueVectorization()
const override {
2664 return Impl.preferEpilogueVectorization();
2667 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2668 return Impl.shouldExpandReduction(II);
2671 unsigned getGISelRematGlobalCost()
const override {
2672 return Impl.getGISelRematGlobalCost();
2675 unsigned getMinTripCountTailFoldingThreshold()
const override {
2676 return Impl.getMinTripCountTailFoldingThreshold();
2679 bool supportsScalableVectors()
const override {
2680 return Impl.supportsScalableVectors();
2683 bool enableScalableVectorization()
const override {
2684 return Impl.enableScalableVectorization();
2687 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2688 Align Alignment)
const override {
2689 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2693 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2694 return Impl.getVPLegalizationStrategy(PI);
2697 bool hasArmWideBranch(
bool Thumb)
const override {
2698 return Impl.hasArmWideBranch(Thumb);
2701 unsigned getMaxNumArgs()
const override {
2702 return Impl.getMaxNumArgs();
2706template <
typename T>
2708 : TTIImpl(new Model<
T>(Impl)) {}
2739 : TTICallback(
Arg.TTICallback) {}
2743 TTICallback =
RHS.TTICallback;
2747 TTICallback = std::move(
RHS.TTICallback);
2779 std::optional<TargetTransformInfo>
TTI;
2781 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL