21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
64class TargetLibraryInfo;
122 Type *RetTy =
nullptr;
135 bool TypeBasedOnly =
false);
205class TargetTransformInfo;
330 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
338 const PointersChainInfo &
Info,
Type *AccessTy,
482 std::pair<const Value *, unsigned>
671 KnownBits & Known,
bool &KnownBitsComputed)
const;
678 SimplifyAndSetOp)
const;
712 bool HasBaseReg, int64_t Scale,
713 unsigned AddrSpace = 0,
824 int64_t BaseOffset,
bool HasBaseReg,
826 unsigned AddrSpace = 0)
const;
870 const APInt &DemandedElts,
871 bool Insert,
bool Extract,
935 bool IsZeroCmp)
const;
967 unsigned *
Fast =
nullptr)
const;
1148 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1159 Type *ScalarValTy)
const;
1165 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1212 unsigned NumStridedMemAccesses,
1213 unsigned NumPrefetches,
bool HasCall)
const;
1253 unsigned Opcode,
Type *Ty,
1257 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
1258 const Instruction *CxtI =
nullptr,
1259 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1269 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1270 const SmallBitVector &OpcodeMask,
1283 ArrayRef<int> Mask = std::nullopt,
1286 ArrayRef<const Value *> Args = std::nullopt)
const;
1337 unsigned Index)
const;
1365 Value *Op1 =
nullptr)
const;
1375 unsigned Index = -1)
const;
1384 const APInt &DemandedDstElts,
1393 const Instruction *
I =
nullptr)
const;
1417 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1431 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1449 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1454 return FMF && !(*FMF).allowReassoc();
1482 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1531 const SCEV *
Ptr =
nullptr)
const;
1555 Type *ExpectedType)
const;
1560 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1561 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1571 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1572 unsigned SrcAlign,
unsigned DestAlign,
1573 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1587 unsigned DefaultCallPenalty)
const;
1623 unsigned AddrSpace)
const;
1627 unsigned AddrSpace)
const;
1639 unsigned ChainSizeInBytes,
1645 unsigned ChainSizeInBytes,
1705 Align Alignment)
const;
1764 template <
typename T>
class Model;
1766 std::unique_ptr<Concept> TTIImpl;
1812 virtual std::pair<const Value *, unsigned>
1816 Value *NewV)
const = 0;
1834 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1839 SimplifyAndSetOp) = 0;
1843 int64_t BaseOffset,
bool HasBaseReg,
1844 int64_t Scale,
unsigned AddrSpace,
1866 Align Alignment) = 0;
1868 Align Alignment) = 0;
1881 bool HasBaseReg, int64_t Scale,
1882 unsigned AddrSpace) = 0;
1894 const APInt &DemandedElts,
1895 bool Insert,
bool Extract,
1916 unsigned *
Fast) = 0;
1937 Type *Ty =
nullptr)
const = 0;
1947 bool IsScalable)
const = 0;
1948 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1950 Type *ScalarValTy)
const = 0;
1952 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1971 unsigned NumStridedMemAccesses,
1972 unsigned NumPrefetches,
1973 bool HasCall)
const = 0;
1992 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2007 unsigned Index) = 0;
2022 unsigned Index) = 0;
2026 const APInt &DemandedDstElts,
2044 bool VariableMask,
Align Alignment,
2049 bool VariableMask,
Align Alignment,
2056 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2059 std::optional<FastMathFlags> FMF,
2086 Type *ExpectedType) = 0;
2089 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2090 std::optional<uint32_t> AtomicElementSize)
const = 0;
2094 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2095 unsigned SrcAlign,
unsigned DestAlign,
2096 std::optional<uint32_t> AtomicCpySize)
const = 0;
2100 unsigned DefaultCallPenalty)
const = 0;
2111 unsigned AddrSpace)
const = 0;
2114 unsigned AddrSpace)
const = 0;
2119 unsigned ChainSizeInBytes,
2122 unsigned ChainSizeInBytes,
2136 Align Alignment)
const = 0;
2143template <
typename T>
2148 Model(
T Impl) : Impl(std::move(Impl)) {}
2149 ~Model()
override =
default;
2151 const DataLayout &getDataLayout()
const override {
2152 return Impl.getDataLayout();
2156 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2157 ArrayRef<const Value *>
Operands, Type *AccessType,
2161 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2163 const PointersChainInfo &
Info,
2168 unsigned getInliningThresholdMultiplier()
const override {
2169 return Impl.getInliningThresholdMultiplier();
2171 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2172 return Impl.adjustInliningThreshold(CB);
2174 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2175 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2177 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2178 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2180 int getInlinerVectorBonusPercent()
const override {
2181 return Impl.getInlinerVectorBonusPercent();
2183 unsigned getCallerAllocaCost(
const CallBase *CB,
2184 const AllocaInst *AI)
const override {
2185 return Impl.getCallerAllocaCost(CB, AI);
2187 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2188 return Impl.getMemcpyCost(
I);
2191 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2192 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2195 InstructionCost getInstructionCost(
const User *U,
2200 BranchProbability getPredictableBranchThreshold()
override {
2201 return Impl.getPredictableBranchThreshold();
2203 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2204 return Impl.hasBranchDivergence(
F);
2206 bool isSourceOfDivergence(
const Value *V)
override {
2207 return Impl.isSourceOfDivergence(V);
2210 bool isAlwaysUniform(
const Value *V)
override {
2211 return Impl.isAlwaysUniform(V);
2214 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2215 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2218 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2219 return Impl.addrspacesMayAlias(AS0, AS1);
2222 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2224 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2226 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2229 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2230 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2234 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2235 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2238 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2239 return Impl.getAssumedAddrSpace(V);
2242 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2244 std::pair<const Value *, unsigned>
2245 getPredicatedAddrSpace(
const Value *V)
const override {
2246 return Impl.getPredicatedAddrSpace(V);
2249 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2250 Value *NewV)
const override {
2251 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2254 bool isLoweredToCall(
const Function *
F)
override {
2255 return Impl.isLoweredToCall(
F);
2257 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2258 UnrollingPreferences &UP,
2259 OptimizationRemarkEmitter *ORE)
override {
2260 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2262 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2263 PeelingPreferences &PP)
override {
2264 return Impl.getPeelingPreferences(L, SE, PP);
2266 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2267 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2268 HardwareLoopInfo &HWLoopInfo)
override {
2269 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2271 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2272 return Impl.preferPredicateOverEpilogue(TFI);
2275 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2276 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2278 std::optional<Instruction *>
2279 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2280 return Impl.instCombineIntrinsic(IC, II);
2282 std::optional<Value *>
2283 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2284 APInt DemandedMask, KnownBits &Known,
2285 bool &KnownBitsComputed)
override {
2286 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2289 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2290 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2291 APInt &UndefElts2, APInt &UndefElts3,
2292 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2293 SimplifyAndSetOp)
override {
2294 return Impl.simplifyDemandedVectorEltsIntrinsic(
2295 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2298 bool isLegalAddImmediate(int64_t Imm)
override {
2299 return Impl.isLegalAddImmediate(Imm);
2301 bool isLegalICmpImmediate(int64_t Imm)
override {
2302 return Impl.isLegalICmpImmediate(Imm);
2304 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2305 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2306 Instruction *
I)
override {
2307 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2310 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2311 const TargetTransformInfo::LSRCost &C2)
override {
2312 return Impl.isLSRCostLess(C1, C2);
2314 bool isNumRegsMajorCostOfLSR()
override {
2315 return Impl.isNumRegsMajorCostOfLSR();
2317 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2318 return Impl.shouldFoldTerminatingConditionAfterLSR();
2320 bool isProfitableLSRChainElement(Instruction *
I)
override {
2321 return Impl.isProfitableLSRChainElement(
I);
2323 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2324 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2325 DominatorTree *DT, AssumptionCache *AC,
2326 TargetLibraryInfo *LibInfo)
override {
2327 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2330 getPreferredAddressingMode(
const Loop *L,
2331 ScalarEvolution *SE)
const override {
2332 return Impl.getPreferredAddressingMode(L, SE);
2334 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2335 return Impl.isLegalMaskedStore(DataType, Alignment);
2337 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2338 return Impl.isLegalMaskedLoad(DataType, Alignment);
2340 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2341 return Impl.isLegalNTStore(DataType, Alignment);
2343 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2344 return Impl.isLegalNTLoad(DataType, Alignment);
2346 bool isLegalBroadcastLoad(Type *ElementTy,
2347 ElementCount NumElements)
const override {
2348 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2350 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2351 return Impl.isLegalMaskedScatter(DataType, Alignment);
2353 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2354 return Impl.isLegalMaskedGather(DataType, Alignment);
2356 bool forceScalarizeMaskedGather(
VectorType *DataType,
2357 Align Alignment)
override {
2358 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2360 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2361 Align Alignment)
override {
2362 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2364 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2365 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2367 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2368 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2370 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2371 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2373 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2374 const SmallBitVector &OpcodeMask)
const override {
2375 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2377 bool enableOrderedReductions()
override {
2378 return Impl.enableOrderedReductions();
2380 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2381 return Impl.hasDivRemOp(DataType, IsSigned);
2383 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2384 return Impl.hasVolatileVariant(
I, AddrSpace);
2386 bool prefersVectorizedAddressing()
override {
2387 return Impl.prefersVectorizedAddressing();
2389 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2390 int64_t BaseOffset,
bool HasBaseReg,
2392 unsigned AddrSpace)
override {
2393 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2396 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2397 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2398 return Impl.isTruncateFree(Ty1, Ty2);
2400 bool isProfitableToHoist(Instruction *
I)
override {
2401 return Impl.isProfitableToHoist(
I);
2403 bool useAA()
override {
return Impl.useAA(); }
2404 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2405 unsigned getRegUsageForType(Type *Ty)
override {
2406 return Impl.getRegUsageForType(Ty);
2408 bool shouldBuildLookupTables()
override {
2409 return Impl.shouldBuildLookupTables();
2411 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2412 return Impl.shouldBuildLookupTablesForConstant(
C);
2414 bool shouldBuildRelLookupTables()
override {
2415 return Impl.shouldBuildRelLookupTables();
2417 bool useColdCCForColdCall(Function &
F)
override {
2418 return Impl.useColdCCForColdCall(
F);
2421 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2422 const APInt &DemandedElts,
2423 bool Insert,
bool Extract,
2425 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2429 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2430 ArrayRef<Type *> Tys,
2432 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2435 bool supportsEfficientVectorElementLoadStore()
override {
2436 return Impl.supportsEfficientVectorElementLoadStore();
2439 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2440 bool supportsTailCallFor(
const CallBase *CB)
override {
2441 return Impl.supportsTailCallFor(CB);
2444 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2445 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2447 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2448 bool IsZeroCmp)
const override {
2449 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2451 bool enableSelectOptimize()
override {
2452 return Impl.enableSelectOptimize();
2454 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2455 return Impl.shouldTreatInstructionLikeSelect(
I);
2457 bool enableInterleavedAccessVectorization()
override {
2458 return Impl.enableInterleavedAccessVectorization();
2460 bool enableMaskedInterleavedAccessVectorization()
override {
2461 return Impl.enableMaskedInterleavedAccessVectorization();
2463 bool isFPVectorizationPotentiallyUnsafe()
override {
2464 return Impl.isFPVectorizationPotentiallyUnsafe();
2466 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2468 unsigned *
Fast)
override {
2473 return Impl.getPopcntSupport(IntTyWidthInBit);
2475 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2477 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2478 return Impl.isExpensiveToSpeculativelyExecute(
I);
2481 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2482 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2485 InstructionCost getFPOpCost(Type *Ty)
override {
2486 return Impl.getFPOpCost(Ty);
2489 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2490 const APInt &Imm, Type *Ty)
override {
2491 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2493 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2495 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2497 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2498 const APInt &Imm, Type *Ty,
2500 Instruction *Inst =
nullptr)
override {
2501 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2504 const APInt &Imm, Type *Ty,
2506 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2508 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2509 const Function &Fn)
const override {
2510 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2512 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2513 return Impl.getNumberOfRegisters(ClassID);
2515 unsigned getRegisterClassForType(
bool Vector,
2516 Type *Ty =
nullptr)
const override {
2517 return Impl.getRegisterClassForType(
Vector, Ty);
2519 const char *getRegisterClassName(
unsigned ClassID)
const override {
2520 return Impl.getRegisterClassName(ClassID);
2522 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2523 return Impl.getRegisterBitWidth(K);
2525 unsigned getMinVectorRegisterBitWidth()
const override {
2526 return Impl.getMinVectorRegisterBitWidth();
2528 std::optional<unsigned>
getMaxVScale()
const override {
2529 return Impl.getMaxVScale();
2531 std::optional<unsigned> getVScaleForTuning()
const override {
2532 return Impl.getVScaleForTuning();
2534 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2535 return Impl.isVScaleKnownToBeAPowerOfTwo();
2537 bool shouldMaximizeVectorBandwidth(
2539 return Impl.shouldMaximizeVectorBandwidth(K);
2541 ElementCount getMinimumVF(
unsigned ElemWidth,
2542 bool IsScalable)
const override {
2543 return Impl.getMinimumVF(ElemWidth, IsScalable);
2545 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2546 return Impl.getMaximumVF(ElemWidth, Opcode);
2548 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2549 Type *ScalarValTy)
const override {
2550 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2552 bool shouldConsiderAddressTypePromotion(
2553 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2554 return Impl.shouldConsiderAddressTypePromotion(
2555 I, AllowPromotionWithoutCommonHeader);
2557 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2558 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2559 return Impl.getCacheSize(Level);
2561 std::optional<unsigned>
2562 getCacheAssociativity(
CacheLevel Level)
const override {
2563 return Impl.getCacheAssociativity(Level);
2566 std::optional<unsigned> getMinPageSize()
const override {
2567 return Impl.getMinPageSize();
2572 unsigned getPrefetchDistance()
const override {
2573 return Impl.getPrefetchDistance();
2579 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2580 unsigned NumStridedMemAccesses,
2581 unsigned NumPrefetches,
2582 bool HasCall)
const override {
2583 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2584 NumPrefetches, HasCall);
2590 unsigned getMaxPrefetchIterationsAhead()
const override {
2591 return Impl.getMaxPrefetchIterationsAhead();
2595 bool enableWritePrefetching()
const override {
2596 return Impl.enableWritePrefetching();
2600 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2601 return Impl.shouldPrefetchAddressSpace(AS);
2604 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2605 return Impl.getMaxInterleaveFactor(VF);
2607 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2609 ProfileSummaryInfo *PSI,
2610 BlockFrequencyInfo *BFI)
override {
2611 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2613 InstructionCost getArithmeticInstrCost(
2615 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2616 ArrayRef<const Value *> Args,
2617 const Instruction *CxtI =
nullptr)
override {
2618 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2621 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2623 const SmallBitVector &OpcodeMask,
2625 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2632 ArrayRef<const Value *> Args)
override {
2633 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args);
2635 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2638 const Instruction *
I)
override {
2639 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2641 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2643 unsigned Index)
override {
2644 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2647 const Instruction *
I =
nullptr)
override {
2648 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2650 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2653 const Instruction *
I)
override {
2654 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2656 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2658 unsigned Index, Value *Op0,
2659 Value *Op1)
override {
2660 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2662 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2664 unsigned Index)
override {
2668 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2669 const APInt &DemandedDstElts,
2671 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2674 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2677 OperandValueInfo OpInfo,
2678 const Instruction *
I)
override {
2682 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2685 const Instruction *
I)
override {
2686 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2689 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2692 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2696 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2697 bool VariableMask, Align Alignment,
2699 const Instruction *
I =
nullptr)
override {
2700 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2704 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2705 bool VariableMask, Align Alignment,
2707 const Instruction *
I =
nullptr)
override {
2708 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2711 InstructionCost getInterleavedMemoryOpCost(
2712 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2714 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2715 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2717 UseMaskForCond, UseMaskForGaps);
2720 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2721 std::optional<FastMathFlags> FMF,
2723 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2728 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2731 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2734 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2738 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2740 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2742 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2744 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2746 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2747 ArrayRef<Type *> Tys,
2751 unsigned getNumberOfParts(Type *Tp)
override {
2752 return Impl.getNumberOfParts(Tp);
2754 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2755 const SCEV *
Ptr)
override {
2756 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2758 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2759 return Impl.getCostOfKeepingLiveOverCall(Tys);
2761 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2762 MemIntrinsicInfo &
Info)
override {
2763 return Impl.getTgtMemIntrinsic(Inst,
Info);
2765 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2766 return Impl.getAtomicMemIntrinsicMaxElementSize();
2768 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2769 Type *ExpectedType)
override {
2770 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2772 Type *getMemcpyLoopLoweringType(
2773 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2774 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2775 std::optional<uint32_t> AtomicElementSize)
const override {
2776 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2777 DestAddrSpace, SrcAlign, DestAlign,
2780 void getMemcpyLoopResidualLoweringType(
2781 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2782 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2783 unsigned SrcAlign,
unsigned DestAlign,
2784 std::optional<uint32_t> AtomicCpySize)
const override {
2785 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2786 SrcAddrSpace, DestAddrSpace,
2787 SrcAlign, DestAlign, AtomicCpySize);
2790 const Function *Callee)
const override {
2791 return Impl.areInlineCompatible(Caller, Callee);
2793 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2794 unsigned DefaultCallPenalty)
const override {
2795 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2797 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2798 const ArrayRef<Type *> &Types)
const override {
2799 return Impl.areTypesABICompatible(Caller, Callee, Types);
2802 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2805 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2807 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2808 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2810 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2811 return Impl.isLegalToVectorizeLoad(LI);
2813 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2814 return Impl.isLegalToVectorizeStore(SI);
2816 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2817 unsigned AddrSpace)
const override {
2818 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2821 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2822 unsigned AddrSpace)
const override {
2823 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2826 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2827 ElementCount VF)
const override {
2828 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2830 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2831 return Impl.isElementTypeLegalForScalableVector(Ty);
2833 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2834 unsigned ChainSizeInBytes,
2836 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2838 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2839 unsigned ChainSizeInBytes,
2841 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2843 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2844 ReductionFlags Flags)
const override {
2845 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2847 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2848 ReductionFlags Flags)
const override {
2849 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2851 bool preferEpilogueVectorization()
const override {
2852 return Impl.preferEpilogueVectorization();
2855 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2856 return Impl.shouldExpandReduction(II);
2859 unsigned getGISelRematGlobalCost()
const override {
2860 return Impl.getGISelRematGlobalCost();
2863 unsigned getMinTripCountTailFoldingThreshold()
const override {
2864 return Impl.getMinTripCountTailFoldingThreshold();
2867 bool supportsScalableVectors()
const override {
2868 return Impl.supportsScalableVectors();
2871 bool enableScalableVectorization()
const override {
2872 return Impl.enableScalableVectorization();
2875 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2876 Align Alignment)
const override {
2877 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2881 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2882 return Impl.getVPLegalizationStrategy(PI);
2885 bool hasArmWideBranch(
bool Thumb)
const override {
2886 return Impl.hasArmWideBranch(Thumb);
2889 unsigned getMaxNumArgs()
const override {
2890 return Impl.getMaxNumArgs();
2894template <
typename T>
2896 : TTIImpl(new Model<
T>(Impl)) {}
2927 : TTICallback(Arg.TTICallback) {}
2929 : TTICallback(
std::
move(Arg.TTICallback)) {}
2931 TTICallback =
RHS.TTICallback;
2935 TTICallback = std::move(
RHS.TTICallback);
2967 std::optional<TargetTransformInfo>
TTI;
2969 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL