21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
64class TargetLibraryInfo;
122 Type *RetTy =
nullptr;
135 bool TypeBasedOnly =
false);
208class TargetTransformInfo;
333 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
341 const PointersChainInfo &
Info,
Type *AccessTy,
485 std::pair<const Value *, unsigned>
674 KnownBits & Known,
bool &KnownBitsComputed)
const;
681 SimplifyAndSetOp)
const;
725 bool HasBaseReg, int64_t Scale,
727 int64_t ScalableOffset = 0)
const;
842 unsigned AddrSpace = 0)
const;
886 const APInt &DemandedElts,
887 bool Insert,
bool Extract,
951 bool IsZeroCmp)
const;
983 unsigned *
Fast =
nullptr)
const;
1164 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const;
1175 Type *ScalarValTy)
const;
1181 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1228 unsigned NumStridedMemAccesses,
1229 unsigned NumPrefetches,
bool HasCall)
const;
1269 unsigned Opcode,
Type *Ty,
1273 ArrayRef<const Value *>
Args = std::nullopt,
1274 const Instruction *CxtI =
nullptr,
1275 const TargetLibraryInfo *TLibInfo =
nullptr)
const;
1285 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
1286 const SmallBitVector &OpcodeMask,
1300 VectorType *SubTp =
nullptr, ArrayRef<const Value *> Args = std::nullopt,
1301 const Instruction *CxtI =
nullptr)
const;
1352 unsigned Index)
const;
1380 Value *Op1 =
nullptr)
const;
1390 unsigned Index = -1)
const;
1399 const APInt &DemandedDstElts,
1408 const Instruction *
I =
nullptr)
const;
1432 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1446 unsigned Opcode,
Type *DataTy,
const Value *
Ptr,
bool VariableMask,
1464 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1469 return FMF && !(*FMF).allowReassoc();
1497 unsigned Opcode,
VectorType *Ty, std::optional<FastMathFlags> FMF,
1546 const SCEV *
Ptr =
nullptr)
const;
1570 Type *ExpectedType)
const;
1575 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1576 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1586 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1587 unsigned SrcAlign,
unsigned DestAlign,
1588 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1602 unsigned DefaultCallPenalty)
const;
1638 unsigned AddrSpace)
const;
1642 unsigned AddrSpace)
const;
1654 unsigned ChainSizeInBytes,
1660 unsigned ChainSizeInBytes,
1720 Align Alignment)
const;
1779 template <
typename T>
class Model;
1781 std::unique_ptr<Concept> TTIImpl;
1827 virtual std::pair<const Value *, unsigned>
1831 Value *NewV)
const = 0;
1849 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1854 SimplifyAndSetOp) = 0;
1859 int64_t BaseOffset,
bool HasBaseReg,
1860 int64_t Scale,
unsigned AddrSpace,
1862 int64_t ScalableOffset) = 0;
1883 Align Alignment) = 0;
1885 Align Alignment) = 0;
1899 bool HasBaseReg, int64_t Scale,
1900 unsigned AddrSpace) = 0;
1912 const APInt &DemandedElts,
1913 bool Insert,
bool Extract,
1934 unsigned *
Fast) = 0;
1955 Type *Ty =
nullptr)
const = 0;
1965 bool IsScalable)
const = 0;
1966 virtual unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const = 0;
1968 Type *ScalarValTy)
const = 0;
1970 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1989 unsigned NumStridedMemAccesses,
1990 unsigned NumPrefetches,
1991 bool HasCall)
const = 0;
2010 VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2024 unsigned Index) = 0;
2039 unsigned Index) = 0;
2043 const APInt &DemandedDstElts,
2061 bool VariableMask,
Align Alignment,
2066 bool VariableMask,
Align Alignment,
2073 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
2076 std::optional<FastMathFlags> FMF,
2103 Type *ExpectedType) = 0;
2106 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2107 std::optional<uint32_t> AtomicElementSize)
const = 0;
2111 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2112 unsigned SrcAlign,
unsigned DestAlign,
2113 std::optional<uint32_t> AtomicCpySize)
const = 0;
2117 unsigned DefaultCallPenalty)
const = 0;
2128 unsigned AddrSpace)
const = 0;
2131 unsigned AddrSpace)
const = 0;
2136 unsigned ChainSizeInBytes,
2139 unsigned ChainSizeInBytes,
2153 Align Alignment)
const = 0;
2160template <
typename T>
2165 Model(
T Impl) : Impl(std::move(Impl)) {}
2166 ~Model()
override =
default;
2168 const DataLayout &getDataLayout()
const override {
2169 return Impl.getDataLayout();
2173 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2174 ArrayRef<const Value *>
Operands, Type *AccessType,
2178 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2180 const PointersChainInfo &
Info,
2185 unsigned getInliningThresholdMultiplier()
const override {
2186 return Impl.getInliningThresholdMultiplier();
2188 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2189 return Impl.adjustInliningThreshold(CB);
2191 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2192 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2194 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2195 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2197 int getInlinerVectorBonusPercent()
const override {
2198 return Impl.getInlinerVectorBonusPercent();
2200 unsigned getCallerAllocaCost(
const CallBase *CB,
2201 const AllocaInst *AI)
const override {
2202 return Impl.getCallerAllocaCost(CB, AI);
2204 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2205 return Impl.getMemcpyCost(
I);
2208 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2209 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2212 InstructionCost getInstructionCost(
const User *U,
2217 BranchProbability getPredictableBranchThreshold()
override {
2218 return Impl.getPredictableBranchThreshold();
2220 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2221 return Impl.hasBranchDivergence(
F);
2223 bool isSourceOfDivergence(
const Value *V)
override {
2224 return Impl.isSourceOfDivergence(V);
2227 bool isAlwaysUniform(
const Value *V)
override {
2228 return Impl.isAlwaysUniform(V);
2231 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2232 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2235 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2236 return Impl.addrspacesMayAlias(AS0, AS1);
2239 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2241 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2243 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2246 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2247 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2251 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2252 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2255 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2256 return Impl.getAssumedAddrSpace(V);
2259 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2261 std::pair<const Value *, unsigned>
2262 getPredicatedAddrSpace(
const Value *V)
const override {
2263 return Impl.getPredicatedAddrSpace(V);
2266 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2267 Value *NewV)
const override {
2268 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2271 bool isLoweredToCall(
const Function *
F)
override {
2272 return Impl.isLoweredToCall(
F);
2274 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2275 UnrollingPreferences &UP,
2276 OptimizationRemarkEmitter *ORE)
override {
2277 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2279 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2280 PeelingPreferences &PP)
override {
2281 return Impl.getPeelingPreferences(L, SE, PP);
2283 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2284 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2285 HardwareLoopInfo &HWLoopInfo)
override {
2286 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2288 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2289 return Impl.preferPredicateOverEpilogue(TFI);
2292 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2293 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2295 std::optional<Instruction *>
2296 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2297 return Impl.instCombineIntrinsic(IC, II);
2299 std::optional<Value *>
2300 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2301 APInt DemandedMask, KnownBits &Known,
2302 bool &KnownBitsComputed)
override {
2303 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2306 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2307 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2308 APInt &UndefElts2, APInt &UndefElts3,
2309 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2310 SimplifyAndSetOp)
override {
2311 return Impl.simplifyDemandedVectorEltsIntrinsic(
2312 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2315 bool isLegalAddImmediate(int64_t Imm)
override {
2316 return Impl.isLegalAddImmediate(Imm);
2318 bool isLegalAddScalableImmediate(int64_t Imm)
override {
2319 return Impl.isLegalAddScalableImmediate(Imm);
2321 bool isLegalICmpImmediate(int64_t Imm)
override {
2322 return Impl.isLegalICmpImmediate(Imm);
2324 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2325 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2326 Instruction *
I, int64_t ScalableOffset)
override {
2327 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2328 AddrSpace,
I, ScalableOffset);
2330 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2331 const TargetTransformInfo::LSRCost &C2)
override {
2332 return Impl.isLSRCostLess(C1, C2);
2334 bool isNumRegsMajorCostOfLSR()
override {
2335 return Impl.isNumRegsMajorCostOfLSR();
2337 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2338 return Impl.shouldFoldTerminatingConditionAfterLSR();
2340 bool isProfitableLSRChainElement(Instruction *
I)
override {
2341 return Impl.isProfitableLSRChainElement(
I);
2343 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2344 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2345 DominatorTree *DT, AssumptionCache *AC,
2346 TargetLibraryInfo *LibInfo)
override {
2347 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2350 getPreferredAddressingMode(
const Loop *L,
2351 ScalarEvolution *SE)
const override {
2352 return Impl.getPreferredAddressingMode(L, SE);
2354 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2355 return Impl.isLegalMaskedStore(DataType, Alignment);
2357 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2358 return Impl.isLegalMaskedLoad(DataType, Alignment);
2360 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2361 return Impl.isLegalNTStore(DataType, Alignment);
2363 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2364 return Impl.isLegalNTLoad(DataType, Alignment);
2366 bool isLegalBroadcastLoad(Type *ElementTy,
2367 ElementCount NumElements)
const override {
2368 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2370 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2371 return Impl.isLegalMaskedScatter(DataType, Alignment);
2373 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2374 return Impl.isLegalMaskedGather(DataType, Alignment);
2376 bool forceScalarizeMaskedGather(
VectorType *DataType,
2377 Align Alignment)
override {
2378 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2380 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2381 Align Alignment)
override {
2382 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2384 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment)
override {
2385 return Impl.isLegalMaskedCompressStore(DataType, Alignment);
2387 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment)
override {
2388 return Impl.isLegalMaskedExpandLoad(DataType, Alignment);
2390 bool isLegalStridedLoadStore(Type *DataType, Align Alignment)
override {
2391 return Impl.isLegalStridedLoadStore(DataType, Alignment);
2393 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType)
override {
2394 return Impl.isLegalMaskedVectorHistogram(AddrType, DataType);
2396 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2397 const SmallBitVector &OpcodeMask)
const override {
2398 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2400 bool enableOrderedReductions()
override {
2401 return Impl.enableOrderedReductions();
2403 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2404 return Impl.hasDivRemOp(DataType, IsSigned);
2406 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2407 return Impl.hasVolatileVariant(
I, AddrSpace);
2409 bool prefersVectorizedAddressing()
override {
2410 return Impl.prefersVectorizedAddressing();
2412 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2413 StackOffset BaseOffset,
bool HasBaseReg,
2415 unsigned AddrSpace)
override {
2416 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2419 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2420 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2421 return Impl.isTruncateFree(Ty1, Ty2);
2423 bool isProfitableToHoist(Instruction *
I)
override {
2424 return Impl.isProfitableToHoist(
I);
2426 bool useAA()
override {
return Impl.useAA(); }
2427 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2428 unsigned getRegUsageForType(Type *Ty)
override {
2429 return Impl.getRegUsageForType(Ty);
2431 bool shouldBuildLookupTables()
override {
2432 return Impl.shouldBuildLookupTables();
2434 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2435 return Impl.shouldBuildLookupTablesForConstant(
C);
2437 bool shouldBuildRelLookupTables()
override {
2438 return Impl.shouldBuildRelLookupTables();
2440 bool useColdCCForColdCall(Function &
F)
override {
2441 return Impl.useColdCCForColdCall(
F);
2444 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2445 const APInt &DemandedElts,
2446 bool Insert,
bool Extract,
2448 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2452 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2453 ArrayRef<Type *> Tys,
2455 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2458 bool supportsEfficientVectorElementLoadStore()
override {
2459 return Impl.supportsEfficientVectorElementLoadStore();
2462 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2463 bool supportsTailCallFor(
const CallBase *CB)
override {
2464 return Impl.supportsTailCallFor(CB);
2467 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2468 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2470 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2471 bool IsZeroCmp)
const override {
2472 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2474 bool enableSelectOptimize()
override {
2475 return Impl.enableSelectOptimize();
2477 bool shouldTreatInstructionLikeSelect(
const Instruction *
I)
override {
2478 return Impl.shouldTreatInstructionLikeSelect(
I);
2480 bool enableInterleavedAccessVectorization()
override {
2481 return Impl.enableInterleavedAccessVectorization();
2483 bool enableMaskedInterleavedAccessVectorization()
override {
2484 return Impl.enableMaskedInterleavedAccessVectorization();
2486 bool isFPVectorizationPotentiallyUnsafe()
override {
2487 return Impl.isFPVectorizationPotentiallyUnsafe();
2489 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2491 unsigned *
Fast)
override {
2496 return Impl.getPopcntSupport(IntTyWidthInBit);
2498 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2500 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2501 return Impl.isExpensiveToSpeculativelyExecute(
I);
2504 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2505 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2508 InstructionCost getFPOpCost(Type *Ty)
override {
2509 return Impl.getFPOpCost(Ty);
2512 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2513 const APInt &Imm, Type *Ty)
override {
2514 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2516 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2518 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2520 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2521 const APInt &Imm, Type *Ty,
2523 Instruction *Inst =
nullptr)
override {
2524 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2527 const APInt &Imm, Type *Ty,
2529 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2531 bool preferToKeepConstantsAttached(
const Instruction &Inst,
2532 const Function &Fn)
const override {
2533 return Impl.preferToKeepConstantsAttached(Inst, Fn);
2535 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2536 return Impl.getNumberOfRegisters(ClassID);
2538 unsigned getRegisterClassForType(
bool Vector,
2539 Type *Ty =
nullptr)
const override {
2540 return Impl.getRegisterClassForType(
Vector, Ty);
2542 const char *getRegisterClassName(
unsigned ClassID)
const override {
2543 return Impl.getRegisterClassName(ClassID);
2545 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2546 return Impl.getRegisterBitWidth(K);
2548 unsigned getMinVectorRegisterBitWidth()
const override {
2549 return Impl.getMinVectorRegisterBitWidth();
2551 std::optional<unsigned>
getMaxVScale()
const override {
2552 return Impl.getMaxVScale();
2554 std::optional<unsigned> getVScaleForTuning()
const override {
2555 return Impl.getVScaleForTuning();
2557 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2558 return Impl.isVScaleKnownToBeAPowerOfTwo();
2560 bool shouldMaximizeVectorBandwidth(
2562 return Impl.shouldMaximizeVectorBandwidth(K);
2564 ElementCount getMinimumVF(
unsigned ElemWidth,
2565 bool IsScalable)
const override {
2566 return Impl.getMinimumVF(ElemWidth, IsScalable);
2568 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2569 return Impl.getMaximumVF(ElemWidth, Opcode);
2571 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2572 Type *ScalarValTy)
const override {
2573 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2575 bool shouldConsiderAddressTypePromotion(
2576 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2577 return Impl.shouldConsiderAddressTypePromotion(
2578 I, AllowPromotionWithoutCommonHeader);
2580 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2581 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2582 return Impl.getCacheSize(Level);
2584 std::optional<unsigned>
2585 getCacheAssociativity(
CacheLevel Level)
const override {
2586 return Impl.getCacheAssociativity(Level);
2589 std::optional<unsigned> getMinPageSize()
const override {
2590 return Impl.getMinPageSize();
2595 unsigned getPrefetchDistance()
const override {
2596 return Impl.getPrefetchDistance();
2602 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2603 unsigned NumStridedMemAccesses,
2604 unsigned NumPrefetches,
2605 bool HasCall)
const override {
2606 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2607 NumPrefetches, HasCall);
2613 unsigned getMaxPrefetchIterationsAhead()
const override {
2614 return Impl.getMaxPrefetchIterationsAhead();
2618 bool enableWritePrefetching()
const override {
2619 return Impl.enableWritePrefetching();
2623 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2624 return Impl.shouldPrefetchAddressSpace(AS);
2627 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2628 return Impl.getMaxInterleaveFactor(VF);
2630 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2632 ProfileSummaryInfo *PSI,
2633 BlockFrequencyInfo *BFI)
override {
2634 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2636 InstructionCost getArithmeticInstrCost(
2638 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2639 ArrayRef<const Value *> Args,
2640 const Instruction *CxtI =
nullptr)
override {
2641 return Impl.getArithmeticInstrCost(Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2644 InstructionCost getAltInstrCost(
VectorType *VecTy,
unsigned Opcode0,
2646 const SmallBitVector &OpcodeMask,
2648 return Impl.getAltInstrCost(VecTy, Opcode0, Opcode1, OpcodeMask,
CostKind);
2655 ArrayRef<const Value *> Args,
2656 const Instruction *CxtI)
override {
2657 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args,
2660 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2663 const Instruction *
I)
override {
2664 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH,
CostKind,
I);
2666 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2668 unsigned Index)
override {
2669 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy,
Index);
2672 const Instruction *
I =
nullptr)
override {
2673 return Impl.getCFInstrCost(Opcode,
CostKind,
I);
2675 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2678 const Instruction *
I)
override {
2679 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2681 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2683 unsigned Index, Value *Op0,
2684 Value *Op1)
override {
2685 return Impl.getVectorInstrCost(Opcode, Val,
CostKind,
Index, Op0, Op1);
2687 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2689 unsigned Index)
override {
2693 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2694 const APInt &DemandedDstElts,
2696 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2699 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2702 OperandValueInfo OpInfo,
2703 const Instruction *
I)
override {
2707 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2710 const Instruction *
I)
override {
2711 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2714 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2717 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment,
AddressSpace,
2721 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2722 bool VariableMask, Align Alignment,
2724 const Instruction *
I =
nullptr)
override {
2725 return Impl.getGatherScatterOpCost(Opcode, DataTy,
Ptr, VariableMask,
2729 getStridedMemoryOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2730 bool VariableMask, Align Alignment,
2732 const Instruction *
I =
nullptr)
override {
2733 return Impl.getStridedMemoryOpCost(Opcode, DataTy,
Ptr, VariableMask,
2736 InstructionCost getInterleavedMemoryOpCost(
2737 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2739 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2740 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2742 UseMaskForCond, UseMaskForGaps);
2745 getArithmeticReductionCost(
unsigned Opcode,
VectorType *Ty,
2746 std::optional<FastMathFlags> FMF,
2748 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF,
CostKind);
2753 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2756 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2759 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2763 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2765 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2767 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2769 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2771 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2772 ArrayRef<Type *> Tys,
2776 unsigned getNumberOfParts(Type *Tp)
override {
2777 return Impl.getNumberOfParts(Tp);
2779 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2780 const SCEV *
Ptr)
override {
2781 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2783 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2784 return Impl.getCostOfKeepingLiveOverCall(Tys);
2786 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2787 MemIntrinsicInfo &
Info)
override {
2788 return Impl.getTgtMemIntrinsic(Inst,
Info);
2790 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2791 return Impl.getAtomicMemIntrinsicMaxElementSize();
2793 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2794 Type *ExpectedType)
override {
2795 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2797 Type *getMemcpyLoopLoweringType(
2798 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2799 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2800 std::optional<uint32_t> AtomicElementSize)
const override {
2801 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2802 DestAddrSpace, SrcAlign, DestAlign,
2805 void getMemcpyLoopResidualLoweringType(
2806 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2807 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2808 unsigned SrcAlign,
unsigned DestAlign,
2809 std::optional<uint32_t> AtomicCpySize)
const override {
2810 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2811 SrcAddrSpace, DestAddrSpace,
2812 SrcAlign, DestAlign, AtomicCpySize);
2815 const Function *Callee)
const override {
2816 return Impl.areInlineCompatible(Caller, Callee);
2818 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2819 unsigned DefaultCallPenalty)
const override {
2820 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2822 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2823 const ArrayRef<Type *> &Types)
const override {
2824 return Impl.areTypesABICompatible(Caller, Callee, Types);
2827 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2830 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2832 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2833 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2835 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2836 return Impl.isLegalToVectorizeLoad(LI);
2838 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2839 return Impl.isLegalToVectorizeStore(SI);
2841 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2842 unsigned AddrSpace)
const override {
2843 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2846 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2847 unsigned AddrSpace)
const override {
2848 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2851 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2852 ElementCount VF)
const override {
2853 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2855 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2856 return Impl.isElementTypeLegalForScalableVector(Ty);
2858 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2859 unsigned ChainSizeInBytes,
2861 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2863 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2864 unsigned ChainSizeInBytes,
2866 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2868 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2869 ReductionFlags Flags)
const override {
2870 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2872 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2873 ReductionFlags Flags)
const override {
2874 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2876 bool preferEpilogueVectorization()
const override {
2877 return Impl.preferEpilogueVectorization();
2880 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2881 return Impl.shouldExpandReduction(II);
2884 unsigned getGISelRematGlobalCost()
const override {
2885 return Impl.getGISelRematGlobalCost();
2888 unsigned getMinTripCountTailFoldingThreshold()
const override {
2889 return Impl.getMinTripCountTailFoldingThreshold();
2892 bool supportsScalableVectors()
const override {
2893 return Impl.supportsScalableVectors();
2896 bool enableScalableVectorization()
const override {
2897 return Impl.enableScalableVectorization();
2900 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2901 Align Alignment)
const override {
2902 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2906 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2907 return Impl.getVPLegalizationStrategy(PI);
2910 bool hasArmWideBranch(
bool Thumb)
const override {
2911 return Impl.hasArmWideBranch(Thumb);
2914 unsigned getMaxNumArgs()
const override {
2915 return Impl.getMaxNumArgs();
2919template <
typename T>
2921 : TTIImpl(new Model<
T>(Impl)) {}
2952 : TTICallback(Arg.TTICallback) {}
2954 : TTICallback(
std::
move(Arg.TTICallback)) {}
2956 TTICallback =
RHS.TTICallback;
2960 TTICallback = std::move(
RHS.TTICallback);
2992 std::optional<TargetTransformInfo>
TTI;
2994 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataWithEVL
Use predicated EVL instructions for tail-folding.
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL