21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
44class BlockFrequencyInfo;
51class OptimizationRemarkEmitter;
52class InterleavedAccessInfo;
57class LoopVectorizationLegality;
58class ProfileSummaryInfo;
59class RecurrenceDescriptor;
64class TargetLibraryInfo;
122 Type *RetTy =
nullptr;
135 bool TypeBasedOnly =
false);
205class TargetTransformInfo;
330 static_assert(
sizeof(PointersChainInfo) == 4,
"Was size increase justified?");
338 const PointersChainInfo &
Info,
Type *AccessTy,
482 std::pair<const Value *, unsigned>
667 KnownBits & Known,
bool &KnownBitsComputed)
const;
674 SimplifyAndSetOp)
const;
708 bool HasBaseReg, int64_t Scale,
709 unsigned AddrSpace = 0,
817 int64_t BaseOffset,
bool HasBaseReg,
819 unsigned AddrSpace = 0)
const;
863 const APInt &DemandedElts,
864 bool Insert,
bool Extract,
928 bool IsZeroCmp)
const;
954 unsigned *
Fast =
nullptr)
const;
1136 Type *ScalarValTy)
const;
1142 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
const;
1186 unsigned NumStridedMemAccesses,
1187 unsigned NumPrefetches,
bool HasCall)
const;
1229 ArrayRef<const Value *>
Args = ArrayRef<const Value *>(),
1230 const Instruction *CxtI =
nullptr)
const;
1242 ArrayRef<int> Mask = std::nullopt,
1245 ArrayRef<const Value *> Args = std::nullopt)
const;
1296 unsigned Index)
const;
1324 Value *Op1 =
nullptr)
const;
1334 unsigned Index = -1)
const;
1343 const APInt &DemandedDstElts,
1352 const Instruction *
I =
nullptr)
const;
1394 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false)
const;
1399 return FMF && !(*FMF).allowReassoc();
1476 const SCEV *
Ptr =
nullptr)
const;
1500 Type *ExpectedType)
const;
1505 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
1506 std::optional<uint32_t> AtomicElementSize = std::nullopt)
const;
1516 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
1517 unsigned SrcAlign,
unsigned DestAlign,
1518 std::optional<uint32_t> AtomicCpySize = std::nullopt)
const;
1532 unsigned DefaultCallPenalty)
const;
1568 unsigned AddrSpace)
const;
1572 unsigned AddrSpace)
const;
1584 unsigned ChainSizeInBytes,
1590 unsigned ChainSizeInBytes,
1650 Align Alignment)
const;
1709 template <
typename T>
class Model;
1711 std::unique_ptr<Concept> TTIImpl;
1757 virtual std::pair<const Value *, unsigned>
1761 Value *NewV)
const = 0;
1779 KnownBits & Known,
bool &KnownBitsComputed) = 0;
1784 SimplifyAndSetOp) = 0;
1788 int64_t BaseOffset,
bool HasBaseReg,
1789 int64_t Scale,
unsigned AddrSpace,
1811 Align Alignment) = 0;
1813 Align Alignment) = 0;
1825 bool HasBaseReg, int64_t Scale,
1826 unsigned AddrSpace) = 0;
1838 const APInt &DemandedElts,
1839 bool Insert,
bool Extract,
1859 unsigned *
Fast) = 0;
1878 Type *Ty =
nullptr)
const = 0;
1888 bool IsScalable)
const = 0;
1891 Type *ScalarValTy)
const = 0;
1893 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader) = 0;
1911 unsigned NumStridedMemAccesses,
1912 unsigned NumPrefetches,
1913 bool HasCall)
const = 0;
1943 unsigned Index) = 0;
1958 unsigned Index) = 0;
1962 const APInt &DemandedDstElts,
1980 bool VariableMask,
Align Alignment,
1987 bool UseMaskForCond =
false,
bool UseMaskForGaps =
false) = 0;
1990 std::optional<FastMathFlags> FMF,
2017 Type *ExpectedType) = 0;
2020 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2021 std::optional<uint32_t> AtomicElementSize)
const = 0;
2025 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2026 unsigned SrcAlign,
unsigned DestAlign,
2027 std::optional<uint32_t> AtomicCpySize)
const = 0;
2031 unsigned DefaultCallPenalty)
const = 0;
2042 unsigned AddrSpace)
const = 0;
2045 unsigned AddrSpace)
const = 0;
2050 unsigned ChainSizeInBytes,
2053 unsigned ChainSizeInBytes,
2067 Align Alignment)
const = 0;
2074template <
typename T>
2079 Model(
T Impl) : Impl(std::move(Impl)) {}
2080 ~Model()
override =
default;
2082 const DataLayout &getDataLayout()
const override {
2083 return Impl.getDataLayout();
2087 getGEPCost(Type *PointeeType,
const Value *
Ptr,
2088 ArrayRef<const Value *>
Operands, Type *AccessType,
2092 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2094 const PointersChainInfo &
Info,
2099 unsigned getInliningThresholdMultiplier()
const override {
2100 return Impl.getInliningThresholdMultiplier();
2102 unsigned adjustInliningThreshold(
const CallBase *CB)
override {
2103 return Impl.adjustInliningThreshold(CB);
2105 unsigned getInliningCostBenefitAnalysisSavingsMultiplier()
const override {
2106 return Impl.getInliningCostBenefitAnalysisSavingsMultiplier();
2108 unsigned getInliningCostBenefitAnalysisProfitableMultiplier()
const override {
2109 return Impl.getInliningCostBenefitAnalysisProfitableMultiplier();
2111 int getInlinerVectorBonusPercent()
const override {
2112 return Impl.getInlinerVectorBonusPercent();
2114 unsigned getCallerAllocaCost(
const CallBase *CB,
2115 const AllocaInst *AI)
const override {
2116 return Impl.getCallerAllocaCost(CB, AI);
2118 InstructionCost getMemcpyCost(
const Instruction *
I)
override {
2119 return Impl.getMemcpyCost(
I);
2122 uint64_t getMaxMemIntrinsicInlineSizeThreshold()
const override {
2123 return Impl.getMaxMemIntrinsicInlineSizeThreshold();
2126 InstructionCost getInstructionCost(
const User *U,
2131 BranchProbability getPredictableBranchThreshold()
override {
2132 return Impl.getPredictableBranchThreshold();
2134 bool hasBranchDivergence(
const Function *
F =
nullptr)
override {
2135 return Impl.hasBranchDivergence(
F);
2137 bool isSourceOfDivergence(
const Value *V)
override {
2138 return Impl.isSourceOfDivergence(V);
2141 bool isAlwaysUniform(
const Value *V)
override {
2142 return Impl.isAlwaysUniform(V);
2145 bool isValidAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2146 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2149 bool addrspacesMayAlias(
unsigned AS0,
unsigned AS1)
const override {
2150 return Impl.addrspacesMayAlias(AS0, AS1);
2153 unsigned getFlatAddressSpace()
override {
return Impl.getFlatAddressSpace(); }
2155 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2157 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2160 bool isNoopAddrSpaceCast(
unsigned FromAS,
unsigned ToAS)
const override {
2161 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2165 canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS)
const override {
2166 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2169 unsigned getAssumedAddrSpace(
const Value *V)
const override {
2170 return Impl.getAssumedAddrSpace(V);
2173 bool isSingleThreaded()
const override {
return Impl.isSingleThreaded(); }
2175 std::pair<const Value *, unsigned>
2176 getPredicatedAddrSpace(
const Value *V)
const override {
2177 return Impl.getPredicatedAddrSpace(V);
2180 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2181 Value *NewV)
const override {
2182 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2185 bool isLoweredToCall(
const Function *
F)
override {
2186 return Impl.isLoweredToCall(
F);
2188 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2189 UnrollingPreferences &UP,
2190 OptimizationRemarkEmitter *ORE)
override {
2191 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2193 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2194 PeelingPreferences &PP)
override {
2195 return Impl.getPeelingPreferences(L, SE, PP);
2197 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2198 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2199 HardwareLoopInfo &HWLoopInfo)
override {
2200 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2202 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)
override {
2203 return Impl.preferPredicateOverEpilogue(TFI);
2206 getPreferredTailFoldingStyle(
bool IVUpdateMayOverflow =
true)
override {
2207 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2209 std::optional<Instruction *>
2210 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
override {
2211 return Impl.instCombineIntrinsic(IC, II);
2213 std::optional<Value *>
2214 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2215 APInt DemandedMask, KnownBits &Known,
2216 bool &KnownBitsComputed)
override {
2217 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2220 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2221 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2222 APInt &UndefElts2, APInt &UndefElts3,
2223 std::function<
void(Instruction *,
unsigned, APInt, APInt &)>
2224 SimplifyAndSetOp)
override {
2225 return Impl.simplifyDemandedVectorEltsIntrinsic(
2226 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2229 bool isLegalAddImmediate(int64_t Imm)
override {
2230 return Impl.isLegalAddImmediate(Imm);
2232 bool isLegalICmpImmediate(int64_t Imm)
override {
2233 return Impl.isLegalICmpImmediate(Imm);
2235 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2236 bool HasBaseReg, int64_t Scale,
unsigned AddrSpace,
2237 Instruction *
I)
override {
2238 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2241 bool isLSRCostLess(
const TargetTransformInfo::LSRCost &C1,
2242 const TargetTransformInfo::LSRCost &C2)
override {
2243 return Impl.isLSRCostLess(C1, C2);
2245 bool isNumRegsMajorCostOfLSR()
override {
2246 return Impl.isNumRegsMajorCostOfLSR();
2248 bool shouldFoldTerminatingConditionAfterLSR()
const override {
2249 return Impl.shouldFoldTerminatingConditionAfterLSR();
2251 bool isProfitableLSRChainElement(Instruction *
I)
override {
2252 return Impl.isProfitableLSRChainElement(
I);
2254 bool canMacroFuseCmp()
override {
return Impl.canMacroFuseCmp(); }
2255 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2256 DominatorTree *DT, AssumptionCache *AC,
2257 TargetLibraryInfo *LibInfo)
override {
2258 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2261 getPreferredAddressingMode(
const Loop *L,
2262 ScalarEvolution *SE)
const override {
2263 return Impl.getPreferredAddressingMode(L, SE);
2265 bool isLegalMaskedStore(Type *DataType, Align Alignment)
override {
2266 return Impl.isLegalMaskedStore(DataType, Alignment);
2268 bool isLegalMaskedLoad(Type *DataType, Align Alignment)
override {
2269 return Impl.isLegalMaskedLoad(DataType, Alignment);
2271 bool isLegalNTStore(Type *DataType, Align Alignment)
override {
2272 return Impl.isLegalNTStore(DataType, Alignment);
2274 bool isLegalNTLoad(Type *DataType, Align Alignment)
override {
2275 return Impl.isLegalNTLoad(DataType, Alignment);
2277 bool isLegalBroadcastLoad(Type *ElementTy,
2278 ElementCount NumElements)
const override {
2279 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2281 bool isLegalMaskedScatter(Type *DataType, Align Alignment)
override {
2282 return Impl.isLegalMaskedScatter(DataType, Alignment);
2284 bool isLegalMaskedGather(Type *DataType, Align Alignment)
override {
2285 return Impl.isLegalMaskedGather(DataType, Alignment);
2287 bool forceScalarizeMaskedGather(
VectorType *DataType,
2288 Align Alignment)
override {
2289 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2291 bool forceScalarizeMaskedScatter(
VectorType *DataType,
2292 Align Alignment)
override {
2293 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2295 bool isLegalMaskedCompressStore(Type *DataType)
override {
2296 return Impl.isLegalMaskedCompressStore(DataType);
2298 bool isLegalMaskedExpandLoad(Type *DataType)
override {
2299 return Impl.isLegalMaskedExpandLoad(DataType);
2301 bool isLegalAltInstr(
VectorType *VecTy,
unsigned Opcode0,
unsigned Opcode1,
2302 const SmallBitVector &
OpcodeMask)
const override {
2303 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1,
OpcodeMask);
2305 bool enableOrderedReductions()
override {
2306 return Impl.enableOrderedReductions();
2308 bool hasDivRemOp(Type *DataType,
bool IsSigned)
override {
2309 return Impl.hasDivRemOp(DataType, IsSigned);
2311 bool hasVolatileVariant(Instruction *
I,
unsigned AddrSpace)
override {
2312 return Impl.hasVolatileVariant(
I, AddrSpace);
2314 bool prefersVectorizedAddressing()
override {
2315 return Impl.prefersVectorizedAddressing();
2317 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2318 int64_t BaseOffset,
bool HasBaseReg,
2320 unsigned AddrSpace)
override {
2321 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2324 bool LSRWithInstrQueries()
override {
return Impl.LSRWithInstrQueries(); }
2325 bool isTruncateFree(Type *Ty1, Type *Ty2)
override {
2326 return Impl.isTruncateFree(Ty1, Ty2);
2328 bool isProfitableToHoist(Instruction *
I)
override {
2329 return Impl.isProfitableToHoist(
I);
2331 bool useAA()
override {
return Impl.useAA(); }
2332 bool isTypeLegal(Type *Ty)
override {
return Impl.isTypeLegal(Ty); }
2333 unsigned getRegUsageForType(Type *Ty)
override {
2334 return Impl.getRegUsageForType(Ty);
2336 bool shouldBuildLookupTables()
override {
2337 return Impl.shouldBuildLookupTables();
2339 bool shouldBuildLookupTablesForConstant(Constant *
C)
override {
2340 return Impl.shouldBuildLookupTablesForConstant(
C);
2342 bool shouldBuildRelLookupTables()
override {
2343 return Impl.shouldBuildRelLookupTables();
2345 bool useColdCCForColdCall(Function &
F)
override {
2346 return Impl.useColdCCForColdCall(
F);
2349 InstructionCost getScalarizationOverhead(
VectorType *Ty,
2350 const APInt &DemandedElts,
2351 bool Insert,
bool Extract,
2353 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2357 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2358 ArrayRef<Type *> Tys,
2360 return Impl.getOperandsScalarizationOverhead(Args, Tys,
CostKind);
2363 bool supportsEfficientVectorElementLoadStore()
override {
2364 return Impl.supportsEfficientVectorElementLoadStore();
2367 bool supportsTailCalls()
override {
return Impl.supportsTailCalls(); }
2368 bool supportsTailCallFor(
const CallBase *CB)
override {
2369 return Impl.supportsTailCallFor(CB);
2372 bool enableAggressiveInterleaving(
bool LoopHasReductions)
override {
2373 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2375 MemCmpExpansionOptions enableMemCmpExpansion(
bool OptSize,
2376 bool IsZeroCmp)
const override {
2377 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2379 bool enableInterleavedAccessVectorization()
override {
2380 return Impl.enableInterleavedAccessVectorization();
2382 bool enableSelectOptimize()
override {
2383 return Impl.enableSelectOptimize();
2385 bool enableMaskedInterleavedAccessVectorization()
override {
2386 return Impl.enableMaskedInterleavedAccessVectorization();
2388 bool isFPVectorizationPotentiallyUnsafe()
override {
2389 return Impl.isFPVectorizationPotentiallyUnsafe();
2391 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
unsigned BitWidth,
2393 unsigned *
Fast)
override {
2398 return Impl.getPopcntSupport(IntTyWidthInBit);
2400 bool haveFastSqrt(Type *Ty)
override {
return Impl.haveFastSqrt(Ty); }
2402 bool isExpensiveToSpeculativelyExecute(
const Instruction*
I)
override {
2403 return Impl.isExpensiveToSpeculativelyExecute(
I);
2406 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
override {
2407 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2410 InstructionCost getFPOpCost(Type *Ty)
override {
2411 return Impl.getFPOpCost(Ty);
2414 InstructionCost getIntImmCodeSizeCost(
unsigned Opc,
unsigned Idx,
2415 const APInt &Imm, Type *Ty)
override {
2416 return Impl.getIntImmCodeSizeCost(Opc,
Idx, Imm, Ty);
2418 InstructionCost getIntImmCost(
const APInt &Imm, Type *Ty,
2420 return Impl.getIntImmCost(Imm, Ty,
CostKind);
2422 InstructionCost getIntImmCostInst(
unsigned Opc,
unsigned Idx,
2423 const APInt &Imm, Type *Ty,
2425 Instruction *Inst =
nullptr)
override {
2426 return Impl.getIntImmCostInst(Opc,
Idx, Imm, Ty,
CostKind, Inst);
2429 const APInt &Imm, Type *Ty,
2431 return Impl.getIntImmCostIntrin(IID,
Idx, Imm, Ty,
CostKind);
2433 unsigned getNumberOfRegisters(
unsigned ClassID)
const override {
2434 return Impl.getNumberOfRegisters(ClassID);
2436 unsigned getRegisterClassForType(
bool Vector,
2437 Type *Ty =
nullptr)
const override {
2438 return Impl.getRegisterClassForType(
Vector, Ty);
2440 const char *getRegisterClassName(
unsigned ClassID)
const override {
2441 return Impl.getRegisterClassName(ClassID);
2443 TypeSize getRegisterBitWidth(
RegisterKind K)
const override {
2444 return Impl.getRegisterBitWidth(K);
2446 unsigned getMinVectorRegisterBitWidth()
const override {
2447 return Impl.getMinVectorRegisterBitWidth();
2449 std::optional<unsigned>
getMaxVScale()
const override {
2450 return Impl.getMaxVScale();
2452 std::optional<unsigned> getVScaleForTuning()
const override {
2453 return Impl.getVScaleForTuning();
2455 bool isVScaleKnownToBeAPowerOfTwo()
const override {
2456 return Impl.isVScaleKnownToBeAPowerOfTwo();
2458 bool shouldMaximizeVectorBandwidth(
2460 return Impl.shouldMaximizeVectorBandwidth(K);
2462 ElementCount getMinimumVF(
unsigned ElemWidth,
2463 bool IsScalable)
const override {
2464 return Impl.getMinimumVF(ElemWidth, IsScalable);
2466 unsigned getMaximumVF(
unsigned ElemWidth,
unsigned Opcode)
const override {
2467 return Impl.getMaximumVF(ElemWidth,
Opcode);
2469 unsigned getStoreMinimumVF(
unsigned VF, Type *ScalarMemTy,
2470 Type *ScalarValTy)
const override {
2471 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2473 bool shouldConsiderAddressTypePromotion(
2474 const Instruction &
I,
bool &AllowPromotionWithoutCommonHeader)
override {
2475 return Impl.shouldConsiderAddressTypePromotion(
2476 I, AllowPromotionWithoutCommonHeader);
2478 unsigned getCacheLineSize()
const override {
return Impl.getCacheLineSize(); }
2479 std::optional<unsigned> getCacheSize(
CacheLevel Level)
const override {
2480 return Impl.getCacheSize(Level);
2482 std::optional<unsigned>
2483 getCacheAssociativity(
CacheLevel Level)
const override {
2484 return Impl.getCacheAssociativity(Level);
2489 unsigned getPrefetchDistance()
const override {
2490 return Impl.getPrefetchDistance();
2496 unsigned getMinPrefetchStride(
unsigned NumMemAccesses,
2497 unsigned NumStridedMemAccesses,
2498 unsigned NumPrefetches,
2499 bool HasCall)
const override {
2500 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2501 NumPrefetches, HasCall);
2507 unsigned getMaxPrefetchIterationsAhead()
const override {
2508 return Impl.getMaxPrefetchIterationsAhead();
2512 bool enableWritePrefetching()
const override {
2513 return Impl.enableWritePrefetching();
2517 bool shouldPrefetchAddressSpace(
unsigned AS)
const override {
2518 return Impl.shouldPrefetchAddressSpace(AS);
2521 unsigned getMaxInterleaveFactor(ElementCount VF)
override {
2522 return Impl.getMaxInterleaveFactor(VF);
2524 unsigned getEstimatedNumberOfCaseClusters(
const SwitchInst &SI,
2526 ProfileSummaryInfo *PSI,
2527 BlockFrequencyInfo *BFI)
override {
2528 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2530 InstructionCost getArithmeticInstrCost(
2532 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2533 ArrayRef<const Value *> Args,
2534 const Instruction *CxtI =
nullptr)
override {
2535 return Impl.getArithmeticInstrCost(
Opcode, Ty,
CostKind, Opd1Info, Opd2Info,
2543 ArrayRef<const Value *> Args)
override {
2544 return Impl.getShuffleCost(Kind, Tp, Mask,
CostKind,
Index, SubTp, Args);
2546 InstructionCost getCastInstrCost(
unsigned Opcode, Type *Dst, Type *Src,
2549 const Instruction *
I)
override {
2552 InstructionCost getExtractWithExtendCost(
unsigned Opcode, Type *Dst,
2554 unsigned Index)
override {
2555 return Impl.getExtractWithExtendCost(
Opcode, Dst, VecTy,
Index);
2558 const Instruction *
I =
nullptr)
override {
2561 InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy,
2564 const Instruction *
I)
override {
2565 return Impl.getCmpSelInstrCost(
Opcode, ValTy, CondTy, VecPred,
CostKind,
I);
2567 InstructionCost getVectorInstrCost(
unsigned Opcode, Type *Val,
2569 unsigned Index, Value *Op0,
2570 Value *Op1)
override {
2573 InstructionCost getVectorInstrCost(
const Instruction &
I, Type *Val,
2575 unsigned Index)
override {
2579 getReplicationShuffleCost(Type *EltTy,
int ReplicationFactor,
int VF,
2580 const APInt &DemandedDstElts,
2582 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2585 InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2588 OperandValueInfo OpInfo,
2589 const Instruction *
I)
override {
2593 InstructionCost getVPMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment,
2596 const Instruction *
I)
override {
2600 InstructionCost getMaskedMemoryOpCost(
unsigned Opcode, Type *Src,
2607 getGatherScatterOpCost(
unsigned Opcode, Type *DataTy,
const Value *
Ptr,
2608 bool VariableMask, Align Alignment,
2610 const Instruction *
I =
nullptr)
override {
2611 return Impl.getGatherScatterOpCost(
Opcode, DataTy,
Ptr, VariableMask,
2614 InstructionCost getInterleavedMemoryOpCost(
2615 unsigned Opcode, Type *VecTy,
unsigned Factor, ArrayRef<unsigned> Indices,
2617 bool UseMaskForCond,
bool UseMaskForGaps)
override {
2618 return Impl.getInterleavedMemoryOpCost(
Opcode, VecTy, Factor, Indices,
2620 UseMaskForCond, UseMaskForGaps);
2624 std::optional<FastMathFlags> FMF,
2626 return Impl.getArithmeticReductionCost(
Opcode, Ty, FMF,
CostKind);
2631 return Impl.getMinMaxReductionCost(IID, Ty, FMF,
CostKind);
2634 getExtendedReductionCost(
unsigned Opcode,
bool IsUnsigned, Type *ResTy,
2637 return Impl.getExtendedReductionCost(
Opcode, IsUnsigned, ResTy, Ty, FMF,
2641 getMulAccReductionCost(
bool IsUnsigned, Type *ResTy,
VectorType *Ty,
2643 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty,
CostKind);
2645 InstructionCost getIntrinsicInstrCost(
const IntrinsicCostAttributes &ICA,
2647 return Impl.getIntrinsicInstrCost(ICA,
CostKind);
2649 InstructionCost getCallInstrCost(Function *
F, Type *
RetTy,
2650 ArrayRef<Type *> Tys,
2654 unsigned getNumberOfParts(Type *Tp)
override {
2655 return Impl.getNumberOfParts(Tp);
2657 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2658 const SCEV *
Ptr)
override {
2659 return Impl.getAddressComputationCost(Ty, SE,
Ptr);
2661 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys)
override {
2662 return Impl.getCostOfKeepingLiveOverCall(Tys);
2664 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2665 MemIntrinsicInfo &
Info)
override {
2666 return Impl.getTgtMemIntrinsic(Inst,
Info);
2668 unsigned getAtomicMemIntrinsicMaxElementSize()
const override {
2669 return Impl.getAtomicMemIntrinsicMaxElementSize();
2671 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2672 Type *ExpectedType)
override {
2673 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2675 Type *getMemcpyLoopLoweringType(
2676 LLVMContext &Context, Value *
Length,
unsigned SrcAddrSpace,
2677 unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign,
2678 std::optional<uint32_t> AtomicElementSize)
const override {
2679 return Impl.getMemcpyLoopLoweringType(Context,
Length, SrcAddrSpace,
2680 DestAddrSpace, SrcAlign, DestAlign,
2683 void getMemcpyLoopResidualLoweringType(
2684 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2685 unsigned RemainingBytes,
unsigned SrcAddrSpace,
unsigned DestAddrSpace,
2686 unsigned SrcAlign,
unsigned DestAlign,
2687 std::optional<uint32_t> AtomicCpySize)
const override {
2688 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2689 SrcAddrSpace, DestAddrSpace,
2690 SrcAlign, DestAlign, AtomicCpySize);
2693 const Function *Callee)
const override {
2694 return Impl.areInlineCompatible(Caller, Callee);
2696 unsigned getInlineCallPenalty(
const Function *
F,
const CallBase &Call,
2697 unsigned DefaultCallPenalty)
const override {
2698 return Impl.getInlineCallPenalty(
F, Call, DefaultCallPenalty);
2700 bool areTypesABICompatible(
const Function *Caller,
const Function *Callee,
2701 const ArrayRef<Type *> &Types)
const override {
2702 return Impl.areTypesABICompatible(Caller, Callee, Types);
2705 return Impl.isIndexedLoadLegal(
Mode, Ty, getDataLayout());
2708 return Impl.isIndexedStoreLegal(
Mode, Ty, getDataLayout());
2710 unsigned getLoadStoreVecRegBitWidth(
unsigned AddrSpace)
const override {
2711 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2713 bool isLegalToVectorizeLoad(LoadInst *LI)
const override {
2714 return Impl.isLegalToVectorizeLoad(LI);
2716 bool isLegalToVectorizeStore(StoreInst *SI)
const override {
2717 return Impl.isLegalToVectorizeStore(SI);
2719 bool isLegalToVectorizeLoadChain(
unsigned ChainSizeInBytes, Align Alignment,
2720 unsigned AddrSpace)
const override {
2721 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2724 bool isLegalToVectorizeStoreChain(
unsigned ChainSizeInBytes, Align Alignment,
2725 unsigned AddrSpace)
const override {
2726 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2729 bool isLegalToVectorizeReduction(
const RecurrenceDescriptor &RdxDesc,
2730 ElementCount VF)
const override {
2731 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2733 bool isElementTypeLegalForScalableVector(Type *Ty)
const override {
2734 return Impl.isElementTypeLegalForScalableVector(Ty);
2736 unsigned getLoadVectorFactor(
unsigned VF,
unsigned LoadSize,
2737 unsigned ChainSizeInBytes,
2739 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2741 unsigned getStoreVectorFactor(
unsigned VF,
unsigned StoreSize,
2742 unsigned ChainSizeInBytes,
2744 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2746 bool preferInLoopReduction(
unsigned Opcode, Type *Ty,
2747 ReductionFlags Flags)
const override {
2748 return Impl.preferInLoopReduction(
Opcode, Ty, Flags);
2750 bool preferPredicatedReductionSelect(
unsigned Opcode, Type *Ty,
2751 ReductionFlags Flags)
const override {
2752 return Impl.preferPredicatedReductionSelect(
Opcode, Ty, Flags);
2754 bool preferEpilogueVectorization()
const override {
2755 return Impl.preferEpilogueVectorization();
2758 bool shouldExpandReduction(
const IntrinsicInst *II)
const override {
2759 return Impl.shouldExpandReduction(II);
2762 unsigned getGISelRematGlobalCost()
const override {
2763 return Impl.getGISelRematGlobalCost();
2766 unsigned getMinTripCountTailFoldingThreshold()
const override {
2767 return Impl.getMinTripCountTailFoldingThreshold();
2770 bool supportsScalableVectors()
const override {
2771 return Impl.supportsScalableVectors();
2774 bool enableScalableVectorization()
const override {
2775 return Impl.enableScalableVectorization();
2778 bool hasActiveVectorLength(
unsigned Opcode, Type *DataType,
2779 Align Alignment)
const override {
2780 return Impl.hasActiveVectorLength(
Opcode, DataType, Alignment);
2784 getVPLegalizationStrategy(
const VPIntrinsic &PI)
const override {
2785 return Impl.getVPLegalizationStrategy(PI);
2788 bool hasArmWideBranch(
bool Thumb)
const override {
2789 return Impl.hasArmWideBranch(Thumb);
2792 unsigned getMaxNumArgs()
const override {
2793 return Impl.getMaxNumArgs();
2797template <
typename T>
2799 : TTIImpl(new Model<
T>(Impl)) {}
2830 : TTICallback(Arg.TTICallback) {}
2832 : TTICallback(
std::
move(Arg.TTICallback)) {}
2834 TTICallback =
RHS.TTICallback;
2838 TTICallback = std::move(
RHS.TTICallback);
2870 std::optional<TargetTransformInfo>
TTI;
2872 virtual void anchor();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
static constexpr uint32_t OpcodeMask
static constexpr uint32_t Opcode
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
API to communicate dependencies between analyses during invalidation.
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This is an important base class in LLVM.
A parsed version of the target data layout string in and methods for querying it.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Convenience struct for specifying and reasoning about fast-math flags.
ImmutablePass class - This class is used to provide information that does not need to be run.
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Drive the analysis of interleaved memory accesses in the loop.
FastMathFlags getFlags() const
const SmallVectorImpl< Type * > & getArgTypes() const
Type * getReturnType() const
bool skipScalarizationCost() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
Intrinsic::ID getID() const
bool isTypeBasedOnly() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
A set of analyses that are preserved following a run of a transformation pass.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Base class of all SIMD vector types.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
AtomicOrdering
Atomic ordering for LLVM's memory model.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
constexpr unsigned BitWidth
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
A CRTP mix-in that provides informational APIs needed for analysis passes.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
HardwareLoopInfo()=delete
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
unsigned short MatchingId
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
LoopVectorizationLegality * LVL