LLVM  17.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <optional>
56 #include <utility>
57 
58 namespace llvm {
59 
60 class Function;
61 class GlobalValue;
62 class LLVMContext;
63 class ScalarEvolution;
64 class SCEV;
65 class TargetMachine;
66 
67 extern cl::opt<unsigned> PartialUnrollingThreshold;
68 
69 /// Base class which can be used to help build a TTI implementation.
70 ///
71 /// This class provides as much implementation of the TTI interface as is
72 /// possible using the target independent parts of the code generator.
73 ///
74 /// In order to subclass it, your class must implement a getST() method to
75 /// return the subtarget, and a getTLI() method to return the target lowering.
76 /// We need these methods implemented in the derived class so that this class
77 /// doesn't have to duplicate storage for them.
78 template <typename T>
80 private:
82  using TTI = TargetTransformInfo;
83 
84  /// Helper function to access this as a T.
85  T *thisT() { return static_cast<T *>(this); }
86 
87  /// Estimate a cost of Broadcast as an extract and sequence of insert
88  /// operations.
89  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy,
92  // Broadcast cost is equal to the cost of extracting the zero'th element
93  // plus the cost of inserting it into every element of the result vector.
94  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
95  CostKind, 0, nullptr, nullptr);
96 
97  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
98  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
99  CostKind, i, nullptr, nullptr);
100  }
101  return Cost;
102  }
103 
104  /// Estimate a cost of shuffle as a sequence of extract and insert
105  /// operations.
106  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy,
108  InstructionCost Cost = 0;
109  // Shuffle cost is equal to the cost of extracting element from its argument
110  // plus the cost of inserting them onto the result vector.
111 
112  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
113  // index 0 of first vector, index 1 of second vector,index 2 of first
114  // vector and finally index 3 of second vector and insert them at index
115  // <0,1,2,3> of result vector.
116  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
117  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
118  CostKind, i, nullptr, nullptr);
119  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
120  CostKind, i, nullptr, nullptr);
121  }
122  return Cost;
123  }
124 
125  /// Estimate a cost of subvector extraction as a sequence of extract and
126  /// insert operations.
127  InstructionCost getExtractSubvectorOverhead(VectorType *VTy,
129  int Index,
130  FixedVectorType *SubVTy) {
131  assert(VTy && SubVTy &&
132  "Can only extract subvectors from vectors");
133  int NumSubElts = SubVTy->getNumElements();
134  assert((!isa<FixedVectorType>(VTy) ||
135  (Index + NumSubElts) <=
136  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
137  "SK_ExtractSubvector index out of range");
138 
139  InstructionCost Cost = 0;
140  // Subvector extraction cost is equal to the cost of extracting element from
141  // the source type plus the cost of inserting them into the result vector
142  // type.
143  for (int i = 0; i != NumSubElts; ++i) {
144  Cost +=
145  thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
146  CostKind, i + Index, nullptr, nullptr);
147  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy,
148  CostKind, i, nullptr, nullptr);
149  }
150  return Cost;
151  }
152 
153  /// Estimate a cost of subvector insertion as a sequence of extract and
154  /// insert operations.
155  InstructionCost getInsertSubvectorOverhead(VectorType *VTy,
157  int Index,
158  FixedVectorType *SubVTy) {
159  assert(VTy && SubVTy &&
160  "Can only insert subvectors into vectors");
161  int NumSubElts = SubVTy->getNumElements();
162  assert((!isa<FixedVectorType>(VTy) ||
163  (Index + NumSubElts) <=
164  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
165  "SK_InsertSubvector index out of range");
166 
167  InstructionCost Cost = 0;
168  // Subvector insertion cost is equal to the cost of extracting element from
169  // the source type plus the cost of inserting them into the result vector
170  // type.
171  for (int i = 0; i != NumSubElts; ++i) {
172  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy,
173  CostKind, i, nullptr, nullptr);
174  Cost +=
175  thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, CostKind,
176  i + Index, nullptr, nullptr);
177  }
178  return Cost;
179  }
180 
181  /// Local query method delegates up to T which *must* implement this!
182  const TargetSubtargetInfo *getST() const {
183  return static_cast<const T *>(this)->getST();
184  }
185 
186  /// Local query method delegates up to T which *must* implement this!
187  const TargetLoweringBase *getTLI() const {
188  return static_cast<const T *>(this)->getTLI();
189  }
190 
191  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
192  switch (M) {
193  case TTI::MIM_Unindexed:
194  return ISD::UNINDEXED;
195  case TTI::MIM_PreInc:
196  return ISD::PRE_INC;
197  case TTI::MIM_PreDec:
198  return ISD::PRE_DEC;
199  case TTI::MIM_PostInc:
200  return ISD::POST_INC;
201  case TTI::MIM_PostDec:
202  return ISD::POST_DEC;
203  }
204  llvm_unreachable("Unexpected MemIndexedMode");
205  }
206 
207  InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
208  Align Alignment,
209  bool VariableMask,
210  bool IsGatherScatter,
212  // We cannot scalarize scalable vectors, so return Invalid.
213  if (isa<ScalableVectorType>(DataTy))
215 
216  auto *VT = cast<FixedVectorType>(DataTy);
217  // Assume the target does not have support for gather/scatter operations
218  // and provide a rough estimate.
219  //
220  // First, compute the cost of the individual memory operations.
221  InstructionCost AddrExtractCost =
222  IsGatherScatter
223  ? getVectorInstrCost(Instruction::ExtractElement,
225  PointerType::get(VT->getElementType(), 0),
226  VT->getNumElements()),
227  CostKind, -1, nullptr, nullptr)
228  : 0;
229  InstructionCost LoadCost =
230  VT->getNumElements() *
231  (AddrExtractCost +
232  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
233 
234  // Next, compute the cost of packing the result in a vector.
235  InstructionCost PackingCost =
237  Opcode == Instruction::Store, CostKind);
238 
239  InstructionCost ConditionalCost = 0;
240  if (VariableMask) {
241  // Compute the cost of conditionally executing the memory operations with
242  // variable masks. This includes extracting the individual conditions, a
243  // branches and PHIs to combine the results.
244  // NOTE: Estimating the cost of conditionally executing the memory
245  // operations accurately is quite difficult and the current solution
246  // provides a very rough estimate only.
247  ConditionalCost =
248  VT->getNumElements() *
250  Instruction::ExtractElement,
252  VT->getNumElements()),
253  CostKind, -1, nullptr, nullptr) +
254  getCFInstrCost(Instruction::Br, CostKind) +
256  }
257 
258  return LoadCost + PackingCost + ConditionalCost;
259  }
260 
261 protected:
262  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
263  : BaseT(DL) {}
264  virtual ~BasicTTIImplBase() = default;
265 
267 
268 public:
269  /// \name Scalar TTI Implementations
270  /// @{
272  unsigned AddressSpace, Align Alignment,
273  unsigned *Fast) const {
275  return getTLI()->allowsMisalignedMemoryAccesses(
276  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
277  }
278 
279  bool hasBranchDivergence() { return false; }
280 
281  bool useGPUDivergenceAnalysis() { return false; }
282 
283  bool isSourceOfDivergence(const Value *V) { return false; }
284 
285  bool isAlwaysUniform(const Value *V) { return false; }
286 
287  unsigned getFlatAddressSpace() {
288  // Return an invalid address space.
289  return -1;
290  }
291 
293  Intrinsic::ID IID) const {
294  return false;
295  }
296 
297  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
298  return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
299  }
300 
301  unsigned getAssumedAddrSpace(const Value *V) const {
302  return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
303  }
304 
305  bool isSingleThreaded() const {
306  return getTLI()->getTargetMachine().Options.ThreadModel ==
308  }
309 
310  std::pair<const Value *, unsigned>
311  getPredicatedAddrSpace(const Value *V) const {
312  return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
313  }
314 
316  Value *NewV) const {
317  return nullptr;
318  }
319 
320  bool isLegalAddImmediate(int64_t imm) {
321  return getTLI()->isLegalAddImmediate(imm);
322  }
323 
324  bool isLegalICmpImmediate(int64_t imm) {
325  return getTLI()->isLegalICmpImmediate(imm);
326  }
327 
328  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
329  bool HasBaseReg, int64_t Scale,
330  unsigned AddrSpace, Instruction *I = nullptr) {
332  AM.BaseGV = BaseGV;
333  AM.BaseOffs = BaseOffset;
334  AM.HasBaseReg = HasBaseReg;
335  AM.Scale = Scale;
336  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
337  }
338 
339  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
340  Type *ScalarValTy) const {
341  auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
342  auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
343  EVT VT = getTLI()->getValueType(DL, SrcTy);
344  if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
345  getTLI()->isOperationCustom(ISD::STORE, VT))
346  return true;
347 
348  EVT ValVT =
349  getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
350  EVT LegalizedVT =
351  getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
352  return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
353  };
354  while (VF > 2 && IsSupportedByTarget(VF))
355  VF /= 2;
356  return VF;
357  }
358 
360  const DataLayout &DL) const {
361  EVT VT = getTLI()->getValueType(DL, Ty);
362  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
363  }
364 
366  const DataLayout &DL) const {
367  EVT VT = getTLI()->getValueType(DL, Ty);
368  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
369  }
370 
373  }
374 
377  }
378 
381  }
382 
384  int64_t BaseOffset, bool HasBaseReg,
385  int64_t Scale, unsigned AddrSpace) {
387  AM.BaseGV = BaseGV;
388  AM.BaseOffs = BaseOffset;
389  AM.HasBaseReg = HasBaseReg;
390  AM.Scale = Scale;
391  if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
392  return 0;
393  return -1;
394  }
395 
396  bool isTruncateFree(Type *Ty1, Type *Ty2) {
397  return getTLI()->isTruncateFree(Ty1, Ty2);
398  }
399 
401  return getTLI()->isProfitableToHoist(I);
402  }
403 
404  bool useAA() const { return getST()->useAA(); }
405 
406  bool isTypeLegal(Type *Ty) {
407  EVT VT = getTLI()->getValueType(DL, Ty);
408  return getTLI()->isTypeLegal(VT);
409  }
410 
411  unsigned getRegUsageForType(Type *Ty) {
412  EVT ETy = getTLI()->getValueType(DL, Ty);
413  return getTLI()->getNumRegisters(Ty->getContext(), ETy);
414  }
415 
416  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
419  return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
420  }
421 
423  unsigned &JumpTableSize,
424  ProfileSummaryInfo *PSI,
426  /// Try to find the estimated number of clusters. Note that the number of
427  /// clusters identified in this function could be different from the actual
428  /// numbers found in lowering. This function ignore switches that are
429  /// lowered with a mix of jump table / bit test / BTree. This function was
430  /// initially intended to be used when estimating the cost of switch in
431  /// inline cost heuristic, but it's a generic cost model to be used in other
432  /// places (e.g., in loop unrolling).
433  unsigned N = SI.getNumCases();
434  const TargetLoweringBase *TLI = getTLI();
435  const DataLayout &DL = this->getDataLayout();
436 
437  JumpTableSize = 0;
438  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
439 
440  // Early exit if both a jump table and bit test are not allowed.
441  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
442  return N;
443 
444  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
445  APInt MinCaseVal = MaxCaseVal;
446  for (auto CI : SI.cases()) {
447  const APInt &CaseVal = CI.getCaseValue()->getValue();
448  if (CaseVal.sgt(MaxCaseVal))
449  MaxCaseVal = CaseVal;
450  if (CaseVal.slt(MinCaseVal))
451  MinCaseVal = CaseVal;
452  }
453 
454  // Check if suitable for a bit test
455  if (N <= DL.getIndexSizeInBits(0u)) {
457  for (auto I : SI.cases())
458  Dests.insert(I.getCaseSuccessor());
459 
460  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
461  DL))
462  return 1;
463  }
464 
465  // Check if suitable for a jump table.
466  if (IsJTAllowed) {
467  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
468  return N;
469  uint64_t Range =
470  (MaxCaseVal - MinCaseVal)
471  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
472  // Check whether a range of clusters is dense enough for a jump table
473  if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
474  JumpTableSize = Range;
475  return 1;
476  }
477  }
478  return N;
479  }
480 
482  const TargetLoweringBase *TLI = getTLI();
485  }
486 
488  const TargetMachine &TM = getTLI()->getTargetMachine();
489  // If non-PIC mode, do not generate a relative lookup table.
490  if (!TM.isPositionIndependent())
491  return false;
492 
493  /// Relative lookup table entries consist of 32-bit offsets.
494  /// Do not generate relative lookup tables for large code models
495  /// in 64-bit achitectures where 32-bit offsets might not be enough.
496  if (TM.getCodeModel() == CodeModel::Medium ||
497  TM.getCodeModel() == CodeModel::Large)
498  return false;
499 
500  Triple TargetTriple = TM.getTargetTriple();
501  if (!TargetTriple.isArch64Bit())
502  return false;
503 
504  // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it
505  // there.
506  if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
507  return false;
508 
509  return true;
510  }
511 
512  bool haveFastSqrt(Type *Ty) {
513  const TargetLoweringBase *TLI = getTLI();
514  EVT VT = TLI->getValueType(DL, Ty);
515  return TLI->isTypeLegal(VT) &&
517  }
518 
520  return true;
521  }
522 
524  // Check whether FADD is available, as a proxy for floating-point in
525  // general.
526  const TargetLoweringBase *TLI = getTLI();
527  EVT VT = TLI->getValueType(DL, Ty);
531  }
532 
533  unsigned getInliningThresholdMultiplier() { return 1; }
534  unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
535 
536  int getInlinerVectorBonusPercent() { return 150; }
537 
541  // This unrolling functionality is target independent, but to provide some
542  // motivation for its intended use, for x86:
543 
544  // According to the Intel 64 and IA-32 Architectures Optimization Reference
545  // Manual, Intel Core models and later have a loop stream detector (and
546  // associated uop queue) that can benefit from partial unrolling.
547  // The relevant requirements are:
548  // - The loop must have no more than 4 (8 for Nehalem and later) branches
549  // taken, and none of them may be calls.
550  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
551 
552  // According to the Software Optimization Guide for AMD Family 15h
553  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
554  // and loop buffer which can benefit from partial unrolling.
555  // The relevant requirements are:
556  // - The loop must have fewer than 16 branches
557  // - The loop must have less than 40 uops in all executed loop branches
558 
559  // The number of taken branches in a loop is hard to estimate here, and
560  // benchmarking has revealed that it is better not to be conservative when
561  // estimating the branch count. As a result, we'll ignore the branch limits
562  // until someone finds a case where it matters in practice.
563 
564  unsigned MaxOps;
565  const TargetSubtargetInfo *ST = getST();
566  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
567  MaxOps = PartialUnrollingThreshold;
568  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
569  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
570  else
571  return;
572 
573  // Scan the loop: don't unroll loops with calls.
574  for (BasicBlock *BB : L->blocks()) {
575  for (Instruction &I : *BB) {
576  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
577  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
578  if (!thisT()->isLoweredToCall(F))
579  continue;
580  }
581 
582  if (ORE) {
583  ORE->emit([&]() {
584  return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
585  L->getHeader())
586  << "advising against unrolling the loop because it "
587  "contains a "
588  << ore::NV("Call", &I);
589  });
590  }
591  return;
592  }
593  }
594  }
595 
596  // Enable runtime and partial unrolling up to the specified size.
597  // Enable using trip count upper bound to unroll loops.
598  UP.Partial = UP.Runtime = UP.UpperBound = true;
599  UP.PartialThreshold = MaxOps;
600 
601  // Avoid unrolling when optimizing for size.
602  UP.OptSizeThreshold = 0;
604 
605  // Set number of instructions optimized when "back edge"
606  // becomes "fall through" to default value of 2.
607  UP.BEInsns = 2;
608  }
609 
612  PP.PeelCount = 0;
613  PP.AllowPeeling = true;
614  PP.AllowLoopNestsPeeling = false;
615  PP.PeelProfiledIterations = true;
616  }
617 
619  AssumptionCache &AC,
620  TargetLibraryInfo *LibInfo,
621  HardwareLoopInfo &HWLoopInfo) {
622  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
623  }
624 
627  DominatorTree *DT,
629  InterleavedAccessInfo *IAI) {
630  return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
631  }
632 
635  }
636 
637  std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
638  IntrinsicInst &II) {
639  return BaseT::instCombineIntrinsic(IC, II);
640  }
641 
642  std::optional<Value *>
644  APInt DemandedMask, KnownBits &Known,
645  bool &KnownBitsComputed) {
646  return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
647  KnownBitsComputed);
648  }
649 
650  std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
651  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
652  APInt &UndefElts2, APInt &UndefElts3,
653  std::function<void(Instruction *, unsigned, APInt, APInt &)>
654  SimplifyAndSetOp) {
656  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
657  SimplifyAndSetOp);
658  }
659 
660  virtual std::optional<unsigned>
662  return std::optional<unsigned>(
663  getST()->getCacheSize(static_cast<unsigned>(Level)));
664  }
665 
666  virtual std::optional<unsigned>
668  std::optional<unsigned> TargetResult =
669  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
670 
671  if (TargetResult)
672  return TargetResult;
673 
674  return BaseT::getCacheAssociativity(Level);
675  }
676 
677  virtual unsigned getCacheLineSize() const {
678  return getST()->getCacheLineSize();
679  }
680 
681  virtual unsigned getPrefetchDistance() const {
682  return getST()->getPrefetchDistance();
683  }
684 
685  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
686  unsigned NumStridedMemAccesses,
687  unsigned NumPrefetches,
688  bool HasCall) const {
689  return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
690  NumPrefetches, HasCall);
691  }
692 
693  virtual unsigned getMaxPrefetchIterationsAhead() const {
694  return getST()->getMaxPrefetchIterationsAhead();
695  }
696 
697  virtual bool enableWritePrefetching() const {
698  return getST()->enableWritePrefetching();
699  }
700 
701  virtual bool shouldPrefetchAddressSpace(unsigned AS) const {
702  return getST()->shouldPrefetchAddressSpace(AS);
703  }
704 
705  /// @}
706 
707  /// \name Vector TTI Implementations
708  /// @{
709 
711  return TypeSize::getFixed(32);
712  }
713 
714  std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
715  std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
716 
717  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
718  /// are set if the demanded result elements need to be inserted and/or
719  /// extracted from vectors.
721  const APInt &DemandedElts,
722  bool Insert, bool Extract,
724  /// FIXME: a bitfield is not a reasonable abstraction for talking about
725  /// which elements are needed from a scalable vector
726  if (isa<ScalableVectorType>(InTy))
728  auto *Ty = cast<FixedVectorType>(InTy);
729 
730  assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
731  "Vector size mismatch");
732 
733  InstructionCost Cost = 0;
734 
735  for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
736  if (!DemandedElts[i])
737  continue;
738  if (Insert)
739  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty,
740  CostKind, i, nullptr, nullptr);
741  if (Extract)
742  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
743  CostKind, i, nullptr, nullptr);
744  }
745 
746  return Cost;
747  }
748 
749  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
751  bool Extract,
753  if (isa<ScalableVectorType>(InTy))
755  auto *Ty = cast<FixedVectorType>(InTy);
756 
757  APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
758  return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
759  CostKind);
760  }
761 
762  /// Estimate the overhead of scalarizing an instructions unique
763  /// non-constant operands. The (potentially vector) types to use for each of
764  /// argument are passes via Tys.
767  ArrayRef<Type *> Tys,
769  assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
770 
771  InstructionCost Cost = 0;
772  SmallPtrSet<const Value*, 4> UniqueOperands;
773  for (int I = 0, E = Args.size(); I != E; I++) {
774  // Disregard things like metadata arguments.
775  const Value *A = Args[I];
776  Type *Ty = Tys[I];
777  if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
778  !Ty->isPtrOrPtrVectorTy())
779  continue;
780 
781  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
782  if (auto *VecTy = dyn_cast<VectorType>(Ty))
783  Cost += getScalarizationOverhead(VecTy, /*Insert*/ false,
784  /*Extract*/ true, CostKind);
785  }
786  }
787 
788  return Cost;
789  }
790 
791  /// Estimate the overhead of scalarizing the inputs and outputs of an
792  /// instruction, with return type RetTy and arguments Args of type Tys. If
793  /// Args are unknown (empty), then the cost associated with one argument is
794  /// added as a heuristic.
797  ArrayRef<Type *> Tys,
800  RetTy, /*Insert*/ true, /*Extract*/ false, CostKind);
801  if (!Args.empty())
803  else
804  // When no information on arguments is provided, we add the cost
805  // associated with one argument as a heuristic.
806  Cost += getScalarizationOverhead(RetTy, /*Insert*/ false,
807  /*Extract*/ true, CostKind);
808 
809  return Cost;
810  }
811 
812  /// Estimate the cost of type-legalization and the legalized type.
813  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
814  LLVMContext &C = Ty->getContext();
815  EVT MTy = getTLI()->getValueType(DL, Ty);
816 
817  InstructionCost Cost = 1;
818  // We keep legalizing the type until we find a legal kind. We assume that
819  // the only operation that costs anything is the split. After splitting
820  // we need to handle two types.
821  while (true) {
823 
825  // Ensure we return a sensible simple VT here, since many callers of
826  // this function require it.
827  MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
828  return std::make_pair(InstructionCost::getInvalid(), VT);
829  }
830 
831  if (LK.first == TargetLoweringBase::TypeLegal)
832  return std::make_pair(Cost, MTy.getSimpleVT());
833 
834  if (LK.first == TargetLoweringBase::TypeSplitVector ||
836  Cost *= 2;
837 
838  // Do not loop with f128 type.
839  if (MTy == LK.second)
840  return std::make_pair(Cost, MTy.getSimpleVT());
841 
842  // Keep legalizing the type.
843  MTy = LK.second;
844  }
845  }
846 
847  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
848 
850  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
853  ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
854  const Instruction *CxtI = nullptr) {
855  // Check if any of the operands are vector operands.
856  const TargetLoweringBase *TLI = getTLI();
857  int ISD = TLI->InstructionOpcodeToISD(Opcode);
858  assert(ISD && "Invalid opcode");
859 
860  // TODO: Handle more cost kinds.
862  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
863  Opd1Info, Opd2Info,
864  Args, CxtI);
865 
866  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
867 
868  bool IsFloat = Ty->isFPOrFPVectorTy();
869  // Assume that floating point arithmetic operations cost twice as much as
870  // integer operations.
871  InstructionCost OpCost = (IsFloat ? 2 : 1);
872 
873  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
874  // The operation is legal. Assume it costs 1.
875  // TODO: Once we have extract/insert subvector cost we need to use them.
876  return LT.first * OpCost;
877  }
878 
879  if (!TLI->isOperationExpand(ISD, LT.second)) {
880  // If the operation is custom lowered, then assume that the code is twice
881  // as expensive.
882  return LT.first * 2 * OpCost;
883  }
884 
885  // An 'Expand' of URem and SRem is special because it may default
886  // to expanding the operation into a sequence of sub-operations
887  // i.e. X % Y -> X-(X/Y)*Y.
888  if (ISD == ISD::UREM || ISD == ISD::SREM) {
889  bool IsSigned = ISD == ISD::SREM;
890  if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
891  LT.second) ||
892  TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
893  LT.second)) {
894  unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
895  InstructionCost DivCost = thisT()->getArithmeticInstrCost(
896  DivOpc, Ty, CostKind, Opd1Info, Opd2Info);
897  InstructionCost MulCost =
898  thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
899  InstructionCost SubCost =
900  thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
901  return DivCost + MulCost + SubCost;
902  }
903  }
904 
905  // We cannot scalarize scalable vectors, so return Invalid.
906  if (isa<ScalableVectorType>(Ty))
908 
909  // Else, assume that we need to scalarize this op.
910  // TODO: If one of the types get legalized by splitting, handle this
911  // similarly to what getCastInstrCost() does.
912  if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
913  InstructionCost Cost = thisT()->getArithmeticInstrCost(
914  Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
915  Args, CxtI);
916  // Return the cost of multiple scalar invocation plus the cost of
917  // inserting and extracting the values.
918  SmallVector<Type *> Tys(Args.size(), Ty);
919  return getScalarizationOverhead(VTy, Args, Tys, CostKind) +
920  VTy->getNumElements() * Cost;
921  }
922 
923  // We don't know anything about this scalar instruction.
924  return OpCost;
925  }
926 
928  ArrayRef<int> Mask) const {
929  int Limit = Mask.size() * 2;
930  if (Mask.empty() ||
931  // Extra check required by isSingleSourceMaskImpl function (called by
932  // ShuffleVectorInst::isSingleSourceMask).
933  any_of(Mask, [Limit](int I) { return I >= Limit; }))
934  return Kind;
935  int Index;
936  switch (Kind) {
939  return TTI::SK_Reverse;
941  return TTI::SK_Broadcast;
942  break;
945  return TTI::SK_Select;
947  return TTI::SK_Transpose;
949  return TTI::SK_Splice;
950  break;
951  case TTI::SK_Select:
952  case TTI::SK_Reverse:
953  case TTI::SK_Broadcast:
954  case TTI::SK_Transpose:
957  case TTI::SK_Splice:
958  break;
959  }
960  return Kind;
961  }
962 
966  VectorType *SubTp,
967  ArrayRef<const Value *> Args = std::nullopt) {
968 
969  switch (improveShuffleKindFromMask(Kind, Mask)) {
970  case TTI::SK_Broadcast:
971  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
972  return getBroadcastShuffleOverhead(FVT, CostKind);
974  case TTI::SK_Select:
975  case TTI::SK_Splice:
976  case TTI::SK_Reverse:
977  case TTI::SK_Transpose:
980  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
981  return getPermuteShuffleOverhead(FVT, CostKind);
984  return getExtractSubvectorOverhead(Tp, CostKind, Index,
985  cast<FixedVectorType>(SubTp));
987  return getInsertSubvectorOverhead(Tp, CostKind, Index,
988  cast<FixedVectorType>(SubTp));
989  }
990  llvm_unreachable("Unknown TTI::ShuffleKind");
991  }
992 
993  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
996  const Instruction *I = nullptr) {
997  if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
998  return 0;
999 
1000  const TargetLoweringBase *TLI = getTLI();
1001  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1002  assert(ISD && "Invalid opcode");
1003  std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
1004  std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
1005 
1006  TypeSize SrcSize = SrcLT.second.getSizeInBits();
1007  TypeSize DstSize = DstLT.second.getSizeInBits();
1008  bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
1009  bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
1010 
1011  switch (Opcode) {
1012  default:
1013  break;
1014  case Instruction::Trunc:
1015  // Check for NOOP conversions.
1016  if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
1017  return 0;
1018  [[fallthrough]];
1019  case Instruction::BitCast:
1020  // Bitcast between types that are legalized to the same type are free and
1021  // assume int to/from ptr of the same size is also free.
1022  if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
1023  SrcSize == DstSize)
1024  return 0;
1025  break;
1026  case Instruction::FPExt:
1027  if (I && getTLI()->isExtFree(I))
1028  return 0;
1029  break;
1030  case Instruction::ZExt:
1031  if (TLI->isZExtFree(SrcLT.second, DstLT.second))
1032  return 0;
1033  [[fallthrough]];
1034  case Instruction::SExt:
1035  if (I && getTLI()->isExtFree(I))
1036  return 0;
1037 
1038  // If this is a zext/sext of a load, return 0 if the corresponding
1039  // extending load exists on target and the result type is legal.
1040  if (CCH == TTI::CastContextHint::Normal) {
1041  EVT ExtVT = EVT::getEVT(Dst);
1042  EVT LoadVT = EVT::getEVT(Src);
1043  unsigned LType =
1044  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
1045  if (DstLT.first == SrcLT.first &&
1046  TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
1047  return 0;
1048  }
1049  break;
1050  case Instruction::AddrSpaceCast:
1051  if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
1052  Dst->getPointerAddressSpace()))
1053  return 0;
1054  break;
1055  }
1056 
1057  auto *SrcVTy = dyn_cast<VectorType>(Src);
1058  auto *DstVTy = dyn_cast<VectorType>(Dst);
1059 
1060  // If the cast is marked as legal (or promote) then assume low cost.
1061  if (SrcLT.first == DstLT.first &&
1062  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
1063  return SrcLT.first;
1064 
1065  // Handle scalar conversions.
1066  if (!SrcVTy && !DstVTy) {
1067  // Just check the op cost. If the operation is legal then assume it costs
1068  // 1.
1069  if (!TLI->isOperationExpand(ISD, DstLT.second))
1070  return 1;
1071 
1072  // Assume that illegal scalar instruction are expensive.
1073  return 4;
1074  }
1075 
1076  // Check vector-to-vector casts.
1077  if (DstVTy && SrcVTy) {
1078  // If the cast is between same-sized registers, then the check is simple.
1079  if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1080 
1081  // Assume that Zext is done using AND.
1082  if (Opcode == Instruction::ZExt)
1083  return SrcLT.first;
1084 
1085  // Assume that sext is done using SHL and SRA.
1086  if (Opcode == Instruction::SExt)
1087  return SrcLT.first * 2;
1088 
1089  // Just check the op cost. If the operation is legal then assume it
1090  // costs
1091  // 1 and multiply by the type-legalization overhead.
1092  if (!TLI->isOperationExpand(ISD, DstLT.second))
1093  return SrcLT.first * 1;
1094  }
1095 
1096  // If we are legalizing by splitting, query the concrete TTI for the cost
1097  // of casting the original vector twice. We also need to factor in the
1098  // cost of the split itself. Count that as 1, to be consistent with
1099  // getTypeLegalizationCost().
1100  bool SplitSrc =
1101  TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1103  bool SplitDst =
1104  TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1106  if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1107  DstVTy->getElementCount().isVector()) {
1108  Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1109  Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1110  T *TTI = static_cast<T *>(this);
1111  // If both types need to be split then the split is free.
1112  InstructionCost SplitCost =
1113  (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1114  return SplitCost +
1115  (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1116  CostKind, I));
1117  }
1118 
1119  // Scalarization cost is Invalid, can't assume any num elements.
1120  if (isa<ScalableVectorType>(DstVTy))
1121  return InstructionCost::getInvalid();
1122 
1123  // In other cases where the source or destination are illegal, assume
1124  // the operation will get scalarized.
1125  unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1126  InstructionCost Cost = thisT()->getCastInstrCost(
1127  Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1128 
1129  // Return the cost of multiple scalar invocation plus the cost of
1130  // inserting and extracting the values.
1131  return getScalarizationOverhead(DstVTy, /*Insert*/ true, /*Extract*/ true,
1132  CostKind) +
1133  Num * Cost;
1134  }
1135 
1136  // We already handled vector-to-vector and scalar-to-scalar conversions.
1137  // This
1138  // is where we handle bitcast between vectors and scalars. We need to assume
1139  // that the conversion is scalarized in one way or another.
1140  if (Opcode == Instruction::BitCast) {
1141  // Illegal bitcasts are done by storing and loading from a stack slot.
1142  return (SrcVTy ? getScalarizationOverhead(SrcVTy, /*Insert*/ false,
1143  /*Extract*/ true, CostKind)
1144  : 0) +
1145  (DstVTy ? getScalarizationOverhead(DstVTy, /*Insert*/ true,
1146  /*Extract*/ false, CostKind)
1147  : 0);
1148  }
1149 
1150  llvm_unreachable("Unhandled cast");
1151  }
1152 
1154  VectorType *VecTy, unsigned Index) {
1156  return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1157  CostKind, Index, nullptr, nullptr) +
1158  thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1160  }
1161 
1163  const Instruction *I = nullptr) {
1164  return BaseT::getCFInstrCost(Opcode, CostKind, I);
1165  }
1166 
1167  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1168  CmpInst::Predicate VecPred,
1170  const Instruction *I = nullptr) {
1171  const TargetLoweringBase *TLI = getTLI();
1172  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1173  assert(ISD && "Invalid opcode");
1174 
1175  // TODO: Handle other cost kinds.
1177  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1178  I);
1179 
1180  // Selects on vectors are actually vector selects.
1181  if (ISD == ISD::SELECT) {
1182  assert(CondTy && "CondTy must exist");
1183  if (CondTy->isVectorTy())
1184  ISD = ISD::VSELECT;
1185  }
1186  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1187 
1188  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1189  !TLI->isOperationExpand(ISD, LT.second)) {
1190  // The operation is legal. Assume it costs 1. Multiply
1191  // by the type-legalization overhead.
1192  return LT.first * 1;
1193  }
1194 
1195  // Otherwise, assume that the cast is scalarized.
1196  // TODO: If one of the types get legalized by splitting, handle this
1197  // similarly to what getCastInstrCost() does.
1198  if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1199  if (isa<ScalableVectorType>(ValTy))
1200  return InstructionCost::getInvalid();
1201 
1202  unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1203  if (CondTy)
1204  CondTy = CondTy->getScalarType();
1205  InstructionCost Cost = thisT()->getCmpSelInstrCost(
1206  Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
1207 
1208  // Return the cost of multiple scalar invocation plus the cost of
1209  // inserting and extracting the values.
1210  return getScalarizationOverhead(ValVTy, /*Insert*/ true,
1211  /*Extract*/ false, CostKind) +
1212  Num * Cost;
1213  }
1214 
1215  // Unknown scalar opcode.
1216  return 1;
1217  }
1218 
1219  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1221  unsigned Index, Value *Op0, Value *Op1) {
1222  return getRegUsageForType(Val->getScalarType());
1223  }
1224 
1227  unsigned Index) {
1228  Value *Op0 = nullptr;
1229  Value *Op1 = nullptr;
1230  if (auto *IE = dyn_cast<InsertElementInst>(&I)) {
1231  Op0 = IE->getOperand(0);
1232  Op1 = IE->getOperand(1);
1233  }
1234  return thisT()->getVectorInstrCost(I.getOpcode(), Val, CostKind, Index, Op0,
1235  Op1);
1236  }
1237 
1238  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1239  int VF,
1240  const APInt &DemandedDstElts,
1242  assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
1243  "Unexpected size of DemandedDstElts.");
1244 
1246 
1247  auto *SrcVT = FixedVectorType::get(EltTy, VF);
1248  auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
1249 
1250  // The Mask shuffling cost is extract all the elements of the Mask
1251  // and insert each of them Factor times into the wide vector:
1252  //
1253  // E.g. an interleaved group with factor 3:
1254  // %mask = icmp ult <8 x i32> %vec1, %vec2
1255  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1256  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1257  // The cost is estimated as extract all mask elements from the <8xi1> mask
1258  // vector and insert them factor times into the <24xi1> shuffled mask
1259  // vector.
1260  APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
1261  Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1262  /*Insert*/ false,
1263  /*Extract*/ true, CostKind);
1264  Cost += thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1265  /*Insert*/ true,
1266  /*Extract*/ false, CostKind);
1267 
1268  return Cost;
1269  }
1270 
1272  getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1275  const Instruction *I = nullptr) {
1276  assert(!Src->isVoidTy() && "Invalid type");
1277  // Assume types, such as structs, are expensive.
1278  if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1279  return 4;
1280  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1281 
1282  // Assuming that all loads of legal types cost 1.
1283  InstructionCost Cost = LT.first;
1285  return Cost;
1286 
1287  const DataLayout &DL = this->getDataLayout();
1288  if (Src->isVectorTy() &&
1289  // In practice it's not currently possible to have a change in lane
1290  // length for extending loads or truncating stores so both types should
1291  // have the same scalable property.
1293  LT.second.getSizeInBits())) {
1294  // This is a vector load that legalizes to a larger type than the vector
1295  // itself. Unless the corresponding extending load or truncating store is
1296  // legal, then this will scalarize.
1298  EVT MemVT = getTLI()->getValueType(DL, Src);
1299  if (Opcode == Instruction::Store)
1300  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1301  else
1302  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1303 
1305  // This is a vector load/store for some illegal type that is scalarized.
1306  // We must account for the cost of building or decomposing the vector.
1308  cast<VectorType>(Src), Opcode != Instruction::Store,
1309  Opcode == Instruction::Store, CostKind);
1310  }
1311  }
1312 
1313  return Cost;
1314  }
1315 
1316  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
1317  Align Alignment, unsigned AddressSpace,
1319  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1320  CostKind);
1321  }
1322 
1323  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1324  const Value *Ptr, bool VariableMask,
1325  Align Alignment,
1327  const Instruction *I = nullptr) {
1328  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1329  true, CostKind);
1330  }
1331 
1333  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1334  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1335  bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1336 
1337  // We cannot scalarize scalable vectors, so return Invalid.
1338  if (isa<ScalableVectorType>(VecTy))
1339  return InstructionCost::getInvalid();
1340 
1341  auto *VT = cast<FixedVectorType>(VecTy);
1342 
1343  unsigned NumElts = VT->getNumElements();
1344  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1345 
1346  unsigned NumSubElts = NumElts / Factor;
1347  auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1348 
1349  // Firstly, the cost of load/store operation.
1351  if (UseMaskForCond || UseMaskForGaps)
1352  Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1354  else
1355  Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1356  CostKind);
1357 
1358  // Legalize the vector type, and get the legalized and unlegalized type
1359  // sizes.
1360  MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
1361  unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1362  unsigned VecTyLTSize = VecTyLT.getStoreSize();
1363 
1364  // Scale the cost of the memory operation by the fraction of legalized
1365  // instructions that will actually be used. We shouldn't account for the
1366  // cost of dead instructions since they will be removed.
1367  //
1368  // E.g., An interleaved load of factor 8:
1369  // %vec = load <16 x i64>, <16 x i64>* %ptr
1370  // %v0 = shufflevector %vec, undef, <0, 8>
1371  //
1372  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1373  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1374  // type). The other loads are unused.
1375  //
1376  // TODO: Note that legalization can turn masked loads/stores into unmasked
1377  // (legalized) loads/stores. This can be reflected in the cost.
1378  if (Cost.isValid() && VecTySize > VecTyLTSize) {
1379  // The number of loads of a legal type it will take to represent a load
1380  // of the unlegalized vector type.
1381  unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1382 
1383  // The number of elements of the unlegalized type that correspond to a
1384  // single legal instruction.
1385  unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1386 
1387  // Determine which legal instructions will be used.
1388  BitVector UsedInsts(NumLegalInsts, false);
1389  for (unsigned Index : Indices)
1390  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1391  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1392 
1393  // Scale the cost of the load by the fraction of legal instructions that
1394  // will be used.
1395  Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
1396  }
1397 
1398  // Then plus the cost of interleave operation.
1399  assert(Indices.size() <= Factor &&
1400  "Interleaved memory op has too many members");
1401 
1402  const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
1403  const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
1404 
1405  APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1406  for (unsigned Index : Indices) {
1407  assert(Index < Factor && "Invalid index for interleaved memory op");
1408  for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1409  DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1410  }
1411 
1412  if (Opcode == Instruction::Load) {
1413  // The interleave cost is similar to extract sub vectors' elements
1414  // from the wide vector, and insert them into sub vectors.
1415  //
1416  // E.g. An interleaved load of factor 2 (with one member of index 0):
1417  // %vec = load <8 x i32>, <8 x i32>* %ptr
1418  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1419  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1420  // <8 x i32> vector and insert them into a <4 x i32> vector.
1421  InstructionCost InsSubCost = thisT()->getScalarizationOverhead(
1422  SubVT, DemandedAllSubElts,
1423  /*Insert*/ true, /*Extract*/ false, CostKind);
1424  Cost += Indices.size() * InsSubCost;
1425  Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1426  /*Insert*/ false,
1427  /*Extract*/ true, CostKind);
1428  } else {
1429  // The interleave cost is extract elements from sub vectors, and
1430  // insert them into the wide vector.
1431  //
1432  // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
1433  // (using VF=4):
1434  // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
1435  // %gaps.mask = <true, true, false, true, true, false,
1436  // true, true, false, true, true, false>
1437  // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
1438  // i32 Align, <12 x i1> %gaps.mask
1439  // The cost is estimated as extract all elements (of actual members,
1440  // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
1441  // i32> vector.
1442  InstructionCost ExtSubCost = thisT()->getScalarizationOverhead(
1443  SubVT, DemandedAllSubElts,
1444  /*Insert*/ false, /*Extract*/ true, CostKind);
1445  Cost += ExtSubCost * Indices.size();
1446  Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1447  /*Insert*/ true,
1448  /*Extract*/ false, CostKind);
1449  }
1450 
1451  if (!UseMaskForCond)
1452  return Cost;
1453 
1454  Type *I8Type = Type::getInt8Ty(VT->getContext());
1455 
1456  Cost += thisT()->getReplicationShuffleCost(
1457  I8Type, Factor, NumSubElts,
1458  UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1459  CostKind);
1460 
1461  // The Gaps mask is invariant and created outside the loop, therefore the
1462  // cost of creating it is not accounted for here. However if we have both
1463  // a MaskForGaps and some other mask that guards the execution of the
1464  // memory access, we need to account for the cost of And-ing the two masks
1465  // inside the loop.
1466  if (UseMaskForGaps) {
1467  auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1468  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1469  CostKind);
1470  }
1471 
1472  return Cost;
1473  }
1474 
1475  /// Get intrinsic cost based on arguments.
1478  // Check for generically free intrinsics.
1479  if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1480  return 0;
1481 
1482  // Assume that target intrinsics are cheap.
1483  Intrinsic::ID IID = ICA.getID();
1484  if (Function::isTargetIntrinsic(IID))
1486 
1487  if (ICA.isTypeBasedOnly())
1489 
1490  Type *RetTy = ICA.getReturnType();
1491 
1492  ElementCount RetVF =
1493  (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1494  : ElementCount::getFixed(1));
1495  const IntrinsicInst *I = ICA.getInst();
1497  FastMathFlags FMF = ICA.getFlags();
1498  switch (IID) {
1499  default:
1500  break;
1501 
1502  case Intrinsic::powi:
1503  if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1504  bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();
1505  if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),
1506  ShouldOptForSize)) {
1507  // The cost is modeled on the expansion performed by ExpandPowI in
1508  // SelectionDAGBuilder.
1509  APInt Exponent = RHSC->getValue().abs();
1510  unsigned ActiveBits = Exponent.getActiveBits();
1511  unsigned PopCount = Exponent.countPopulation();
1512  InstructionCost Cost = (ActiveBits + PopCount - 2) *
1513  thisT()->getArithmeticInstrCost(
1514  Instruction::FMul, RetTy, CostKind);
1515  if (RHSC->getSExtValue() < 0)
1516  Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
1517  CostKind);
1518  return Cost;
1519  }
1520  }
1521  break;
1522  case Intrinsic::cttz:
1523  // FIXME: If necessary, this should go in target-specific overrides.
1524  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy))
1526  break;
1527 
1528  case Intrinsic::ctlz:
1529  // FIXME: If necessary, this should go in target-specific overrides.
1530  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy))
1532  break;
1533 
1534  case Intrinsic::memcpy:
1535  return thisT()->getMemcpyCost(ICA.getInst());
1536 
1537  case Intrinsic::masked_scatter: {
1538  const Value *Mask = Args[3];
1539  bool VarMask = !isa<Constant>(Mask);
1540  Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1541  return thisT()->getGatherScatterOpCost(Instruction::Store,
1542  ICA.getArgTypes()[0], Args[1],
1543  VarMask, Alignment, CostKind, I);
1544  }
1545  case Intrinsic::masked_gather: {
1546  const Value *Mask = Args[2];
1547  bool VarMask = !isa<Constant>(Mask);
1548  Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1549  return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1550  VarMask, Alignment, CostKind, I);
1551  }
1552  case Intrinsic::experimental_stepvector: {
1553  if (isa<ScalableVectorType>(RetTy))
1555  // The cost of materialising a constant integer vector.
1557  }
1558  case Intrinsic::vector_extract: {
1559  // FIXME: Handle case where a scalable vector is extracted from a scalable
1560  // vector
1561  if (isa<ScalableVectorType>(RetTy))
1563  unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1564  return thisT()->getShuffleCost(
1565  TTI::SK_ExtractSubvector, cast<VectorType>(Args[0]->getType()),
1566  std::nullopt, CostKind, Index, cast<VectorType>(RetTy));
1567  }
1568  case Intrinsic::vector_insert: {
1569  // FIXME: Handle case where a scalable vector is inserted into a scalable
1570  // vector
1571  if (isa<ScalableVectorType>(Args[1]->getType()))
1573  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1574  return thisT()->getShuffleCost(
1575  TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()),
1576  std::nullopt, CostKind, Index, cast<VectorType>(Args[1]->getType()));
1577  }
1578  case Intrinsic::experimental_vector_reverse: {
1579  return thisT()->getShuffleCost(
1580  TTI::SK_Reverse, cast<VectorType>(Args[0]->getType()), std::nullopt,
1581  CostKind, 0, cast<VectorType>(RetTy));
1582  }
1583  case Intrinsic::experimental_vector_splice: {
1584  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1585  return thisT()->getShuffleCost(
1586  TTI::SK_Splice, cast<VectorType>(Args[0]->getType()), std::nullopt,
1587  CostKind, Index, cast<VectorType>(RetTy));
1588  }
1589  case Intrinsic::vector_reduce_add:
1590  case Intrinsic::vector_reduce_mul:
1591  case Intrinsic::vector_reduce_and:
1592  case Intrinsic::vector_reduce_or:
1593  case Intrinsic::vector_reduce_xor:
1594  case Intrinsic::vector_reduce_smax:
1595  case Intrinsic::vector_reduce_smin:
1596  case Intrinsic::vector_reduce_fmax:
1597  case Intrinsic::vector_reduce_fmin:
1598  case Intrinsic::vector_reduce_umax:
1599  case Intrinsic::vector_reduce_umin: {
1600  IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1602  }
1603  case Intrinsic::vector_reduce_fadd:
1604  case Intrinsic::vector_reduce_fmul: {
1606  IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1608  }
1609  case Intrinsic::fshl:
1610  case Intrinsic::fshr: {
1611  const Value *X = Args[0];
1612  const Value *Y = Args[1];
1613  const Value *Z = Args[2];
1614  const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
1615  const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
1616  const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
1617  const TTI::OperandValueInfo OpInfoBW =
1620  : TTI::OP_None};
1621 
1622  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1623  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1624  InstructionCost Cost = 0;
1625  Cost +=
1626  thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1627  Cost +=
1628  thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1629  Cost += thisT()->getArithmeticInstrCost(
1630  BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
1631  {OpInfoZ.Kind, TTI::OP_None});
1632  Cost += thisT()->getArithmeticInstrCost(
1633  BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
1634  {OpInfoZ.Kind, TTI::OP_None});
1635  // Non-constant shift amounts requires a modulo.
1636  if (!OpInfoZ.isConstant())
1637  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1638  CostKind, OpInfoZ, OpInfoBW);
1639  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1640  if (X != Y) {
1641  Type *CondTy = RetTy->getWithNewBitWidth(1);
1642  Cost +=
1643  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1645  Cost +=
1646  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1648  }
1649  return Cost;
1650  }
1651  case Intrinsic::get_active_lane_mask: {
1652  EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1653  EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1654 
1655  // If we're not expanding the intrinsic then we assume this is cheap
1656  // to implement.
1657  if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1658  return getTypeLegalizationCost(RetTy).first;
1659  }
1660 
1661  // Create the expanded types that will be used to calculate the uadd_sat
1662  // operation.
1663  Type *ExpRetTy = VectorType::get(
1664  ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1665  IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
1667  thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1668  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1670  return Cost;
1671  }
1672  }
1673 
1674  // Assume that we need to scalarize this intrinsic.
1675  // Compute the scalarization overhead based on Args for a vector
1676  // intrinsic.
1677  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
1678  if (RetVF.isVector() && !RetVF.isScalable()) {
1679  ScalarizationCost = 0;
1680  if (!RetTy->isVoidTy())
1681  ScalarizationCost += getScalarizationOverhead(
1682  cast<VectorType>(RetTy),
1683  /*Insert*/ true, /*Extract*/ false, CostKind);
1684  ScalarizationCost +=
1686  }
1687 
1688  IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1689  ScalarizationCost);
1690  return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1691  }
1692 
1693  /// Get intrinsic cost based on argument types.
1694  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1695  /// cost of scalarizing the arguments and the return value will be computed
1696  /// based on types.
1700  Intrinsic::ID IID = ICA.getID();
1701  Type *RetTy = ICA.getReturnType();
1702  const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1703  FastMathFlags FMF = ICA.getFlags();
1704  InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
1705  bool SkipScalarizationCost = ICA.skipScalarizationCost();
1706 
1707  VectorType *VecOpTy = nullptr;
1708  if (!Tys.empty()) {
1709  // The vector reduction operand is operand 0 except for fadd/fmul.
1710  // Their operand 0 is a scalar start value, so the vector op is operand 1.
1711  unsigned VecTyIndex = 0;
1712  if (IID == Intrinsic::vector_reduce_fadd ||
1713  IID == Intrinsic::vector_reduce_fmul)
1714  VecTyIndex = 1;
1715  assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1716  VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1717  }
1718 
1719  // Library call cost - other than size, make it expensive.
1720  unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
1721  unsigned ISD = 0;
1722  switch (IID) {
1723  default: {
1724  // Scalable vectors cannot be scalarized, so return Invalid.
1725  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1726  return isa<ScalableVectorType>(Ty);
1727  }))
1728  return InstructionCost::getInvalid();
1729 
1730  // Assume that we need to scalarize this intrinsic.
1731  InstructionCost ScalarizationCost =
1732  SkipScalarizationCost ? ScalarizationCostPassed : 0;
1733  unsigned ScalarCalls = 1;
1734  Type *ScalarRetTy = RetTy;
1735  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1736  if (!SkipScalarizationCost)
1737  ScalarizationCost = getScalarizationOverhead(
1738  RetVTy, /*Insert*/ true, /*Extract*/ false, CostKind);
1739  ScalarCalls = std::max(ScalarCalls,
1740  cast<FixedVectorType>(RetVTy)->getNumElements());
1741  ScalarRetTy = RetTy->getScalarType();
1742  }
1743  SmallVector<Type *, 4> ScalarTys;
1744  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1745  Type *Ty = Tys[i];
1746  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1747  if (!SkipScalarizationCost)
1748  ScalarizationCost += getScalarizationOverhead(
1749  VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
1750  ScalarCalls = std::max(ScalarCalls,
1751  cast<FixedVectorType>(VTy)->getNumElements());
1752  Ty = Ty->getScalarType();
1753  }
1754  ScalarTys.push_back(Ty);
1755  }
1756  if (ScalarCalls == 1)
1757  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1758 
1759  IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1760  InstructionCost ScalarCost =
1761  thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1762 
1763  return ScalarCalls * ScalarCost + ScalarizationCost;
1764  }
1765  // Look for intrinsics that can be lowered directly or turned into a scalar
1766  // intrinsic call.
1767  case Intrinsic::sqrt:
1768  ISD = ISD::FSQRT;
1769  break;
1770  case Intrinsic::sin:
1771  ISD = ISD::FSIN;
1772  break;
1773  case Intrinsic::cos:
1774  ISD = ISD::FCOS;
1775  break;
1776  case Intrinsic::exp:
1777  ISD = ISD::FEXP;
1778  break;
1779  case Intrinsic::exp2:
1780  ISD = ISD::FEXP2;
1781  break;
1782  case Intrinsic::log:
1783  ISD = ISD::FLOG;
1784  break;
1785  case Intrinsic::log10:
1786  ISD = ISD::FLOG10;
1787  break;
1788  case Intrinsic::log2:
1789  ISD = ISD::FLOG2;
1790  break;
1791  case Intrinsic::fabs:
1792  ISD = ISD::FABS;
1793  break;
1794  case Intrinsic::canonicalize:
1795  ISD = ISD::FCANONICALIZE;
1796  break;
1797  case Intrinsic::minnum:
1798  ISD = ISD::FMINNUM;
1799  break;
1800  case Intrinsic::maxnum:
1801  ISD = ISD::FMAXNUM;
1802  break;
1803  case Intrinsic::minimum:
1804  ISD = ISD::FMINIMUM;
1805  break;
1806  case Intrinsic::maximum:
1807  ISD = ISD::FMAXIMUM;
1808  break;
1809  case Intrinsic::copysign:
1810  ISD = ISD::FCOPYSIGN;
1811  break;
1812  case Intrinsic::floor:
1813  ISD = ISD::FFLOOR;
1814  break;
1815  case Intrinsic::ceil:
1816  ISD = ISD::FCEIL;
1817  break;
1818  case Intrinsic::trunc:
1819  ISD = ISD::FTRUNC;
1820  break;
1821  case Intrinsic::nearbyint:
1822  ISD = ISD::FNEARBYINT;
1823  break;
1824  case Intrinsic::rint:
1825  ISD = ISD::FRINT;
1826  break;
1827  case Intrinsic::round:
1828  ISD = ISD::FROUND;
1829  break;
1830  case Intrinsic::roundeven:
1831  ISD = ISD::FROUNDEVEN;
1832  break;
1833  case Intrinsic::pow:
1834  ISD = ISD::FPOW;
1835  break;
1836  case Intrinsic::fma:
1837  ISD = ISD::FMA;
1838  break;
1839  case Intrinsic::fmuladd:
1840  ISD = ISD::FMA;
1841  break;
1842  case Intrinsic::experimental_constrained_fmuladd:
1843  ISD = ISD::STRICT_FMA;
1844  break;
1845  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1846  case Intrinsic::lifetime_start:
1847  case Intrinsic::lifetime_end:
1848  case Intrinsic::sideeffect:
1849  case Intrinsic::pseudoprobe:
1850  case Intrinsic::arithmetic_fence:
1851  return 0;
1852  case Intrinsic::masked_store: {
1853  Type *Ty = Tys[0];
1854  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1855  return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1856  CostKind);
1857  }
1858  case Intrinsic::masked_load: {
1859  Type *Ty = RetTy;
1860  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1861  return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1862  CostKind);
1863  }
1864  case Intrinsic::vector_reduce_add:
1865  return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1866  std::nullopt, CostKind);
1867  case Intrinsic::vector_reduce_mul:
1868  return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1869  std::nullopt, CostKind);
1870  case Intrinsic::vector_reduce_and:
1871  return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1872  std::nullopt, CostKind);
1873  case Intrinsic::vector_reduce_or:
1874  return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
1875  std::nullopt, CostKind);
1876  case Intrinsic::vector_reduce_xor:
1877  return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1878  std::nullopt, CostKind);
1879  case Intrinsic::vector_reduce_fadd:
1880  return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1881  FMF, CostKind);
1882  case Intrinsic::vector_reduce_fmul:
1883  return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1884  FMF, CostKind);
1885  case Intrinsic::vector_reduce_smax:
1886  case Intrinsic::vector_reduce_smin:
1887  case Intrinsic::vector_reduce_fmax:
1888  case Intrinsic::vector_reduce_fmin:
1889  return thisT()->getMinMaxReductionCost(
1890  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1891  /*IsUnsigned=*/false, CostKind);
1892  case Intrinsic::vector_reduce_umax:
1893  case Intrinsic::vector_reduce_umin:
1894  return thisT()->getMinMaxReductionCost(
1895  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1896  /*IsUnsigned=*/true, CostKind);
1897  case Intrinsic::abs: {
1898  // abs(X) = select(icmp(X,0),X,sub(0,X))
1899  Type *CondTy = RetTy->getWithNewBitWidth(1);
1901  InstructionCost Cost = 0;
1902  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1903  Pred, CostKind);
1904  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1905  Pred, CostKind);
1906  // TODO: Should we add an OperandValueProperties::OP_Zero property?
1907  Cost += thisT()->getArithmeticInstrCost(
1908  BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
1909  return Cost;
1910  }
1911  case Intrinsic::smax:
1912  case Intrinsic::smin:
1913  case Intrinsic::umax:
1914  case Intrinsic::umin: {
1915  // minmax(X,Y) = select(icmp(X,Y),X,Y)
1916  Type *CondTy = RetTy->getWithNewBitWidth(1);
1917  bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
1918  CmpInst::Predicate Pred =
1919  IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
1920  InstructionCost Cost = 0;
1921  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1922  Pred, CostKind);
1923  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1924  Pred, CostKind);
1925  return Cost;
1926  }
1927  case Intrinsic::sadd_sat:
1928  case Intrinsic::ssub_sat: {
1929  Type *CondTy = RetTy->getWithNewBitWidth(1);
1930 
1931  Type *OpTy = StructType::create({RetTy, CondTy});
1932  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1933  ? Intrinsic::sadd_with_overflow
1934  : Intrinsic::ssub_with_overflow;
1936 
1937  // SatMax -> Overflow && SumDiff < 0
1938  // SatMin -> Overflow && SumDiff >= 0
1939  InstructionCost Cost = 0;
1940  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1941  nullptr, ScalarizationCostPassed);
1942  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1943  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1944  Pred, CostKind);
1945  Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1946  CondTy, Pred, CostKind);
1947  return Cost;
1948  }
1949  case Intrinsic::uadd_sat:
1950  case Intrinsic::usub_sat: {
1951  Type *CondTy = RetTy->getWithNewBitWidth(1);
1952 
1953  Type *OpTy = StructType::create({RetTy, CondTy});
1954  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1955  ? Intrinsic::uadd_with_overflow
1956  : Intrinsic::usub_with_overflow;
1957 
1958  InstructionCost Cost = 0;
1959  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1960  nullptr, ScalarizationCostPassed);
1961  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1962  Cost +=
1963  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1965  return Cost;
1966  }
1967  case Intrinsic::smul_fix:
1968  case Intrinsic::umul_fix: {
1969  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1970  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1971 
1972  unsigned ExtOp =
1973  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1975 
1976  InstructionCost Cost = 0;
1977  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1978  Cost +=
1979  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1980  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1981  CCH, CostKind);
1982  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1983  CostKind,
1986  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1989  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1990  return Cost;
1991  }
1992  case Intrinsic::sadd_with_overflow:
1993  case Intrinsic::ssub_with_overflow: {
1994  Type *SumTy = RetTy->getContainedType(0);
1995  Type *OverflowTy = RetTy->getContainedType(1);
1996  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1998  : BinaryOperator::Sub;
1999 
2000  // Add:
2001  // Overflow -> (Result < LHS) ^ (RHS < 0)
2002  // Sub:
2003  // Overflow -> (Result < LHS) ^ (RHS > 0)
2004  InstructionCost Cost = 0;
2005  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2006  Cost += 2 * thisT()->getCmpSelInstrCost(
2007  Instruction::ICmp, SumTy, OverflowTy,
2009  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
2010  CostKind);
2011  return Cost;
2012  }
2013  case Intrinsic::uadd_with_overflow:
2014  case Intrinsic::usub_with_overflow: {
2015  Type *SumTy = RetTy->getContainedType(0);
2016  Type *OverflowTy = RetTy->getContainedType(1);
2017  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
2019  : BinaryOperator::Sub;
2020  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
2023 
2024  InstructionCost Cost = 0;
2025  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
2026  Cost +=
2027  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
2028  Pred, CostKind);
2029  return Cost;
2030  }
2031  case Intrinsic::smul_with_overflow:
2032  case Intrinsic::umul_with_overflow: {
2033  Type *MulTy = RetTy->getContainedType(0);
2034  Type *OverflowTy = RetTy->getContainedType(1);
2035  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
2036  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
2037  bool IsSigned = IID == Intrinsic::smul_with_overflow;
2038 
2039  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
2041 
2042  InstructionCost Cost = 0;
2043  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
2044  Cost +=
2045  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2046  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
2047  CCH, CostKind);
2048  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
2049  CostKind,
2052 
2053  if (IsSigned)
2054  Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
2055  CostKind,
2058 
2059  Cost += thisT()->getCmpSelInstrCost(
2060  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
2061  return Cost;
2062  }
2063  case Intrinsic::fptosi_sat:
2064  case Intrinsic::fptoui_sat: {
2065  if (Tys.empty())
2066  break;
2067  Type *FromTy = Tys[0];
2068  bool IsSigned = IID == Intrinsic::fptosi_sat;
2069 
2070  InstructionCost Cost = 0;
2072  {FromTy, FromTy});
2073  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
2075  {FromTy, FromTy});
2076  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
2077  Cost += thisT()->getCastInstrCost(
2078  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
2080  if (IsSigned) {
2081  Type *CondTy = RetTy->getWithNewBitWidth(1);
2082  Cost += thisT()->getCmpSelInstrCost(
2083  BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2084  Cost += thisT()->getCmpSelInstrCost(
2086  }
2087  return Cost;
2088  }
2089  case Intrinsic::ctpop:
2090  ISD = ISD::CTPOP;
2091  // In case of legalization use TCC_Expensive. This is cheaper than a
2092  // library call but still not a cheap instruction.
2093  SingleCallCost = TargetTransformInfo::TCC_Expensive;
2094  break;
2095  case Intrinsic::ctlz:
2096  ISD = ISD::CTLZ;
2097  break;
2098  case Intrinsic::cttz:
2099  ISD = ISD::CTTZ;
2100  break;
2101  case Intrinsic::bswap:
2102  ISD = ISD::BSWAP;
2103  break;
2104  case Intrinsic::bitreverse:
2105  ISD = ISD::BITREVERSE;
2106  break;
2107  }
2108 
2109  const TargetLoweringBase *TLI = getTLI();
2110  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
2111 
2112  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
2113  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2114  TLI->isFAbsFree(LT.second)) {
2115  return 0;
2116  }
2117 
2118  // The operation is legal. Assume it costs 1.
2119  // If the type is split to multiple registers, assume that there is some
2120  // overhead to this.
2121  // TODO: Once we have extract/insert subvector cost we need to use them.
2122  if (LT.first > 1)
2123  return (LT.first * 2);
2124  else
2125  return (LT.first * 1);
2126  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
2127  // If the operation is custom lowered then assume
2128  // that the code is twice as expensive.
2129  return (LT.first * 2);
2130  }
2131 
2132  // If we can't lower fmuladd into an FMA estimate the cost as a floating
2133  // point mul followed by an add.
2134  if (IID == Intrinsic::fmuladd)
2135  return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2136  CostKind) +
2137  thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2138  CostKind);
2139  if (IID == Intrinsic::experimental_constrained_fmuladd) {
2140  IntrinsicCostAttributes FMulAttrs(
2141  Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2142  IntrinsicCostAttributes FAddAttrs(
2143  Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2144  return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
2145  thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
2146  }
2147 
2148  // Else, assume that we need to scalarize this intrinsic. For math builtins
2149  // this will emit a costly libcall, adding call overhead and spills. Make it
2150  // very expensive.
2151  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2152  // Scalable vectors cannot be scalarized, so return Invalid.
2153  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2154  return isa<ScalableVectorType>(Ty);
2155  }))
2156  return InstructionCost::getInvalid();
2157 
2158  InstructionCost ScalarizationCost =
2159  SkipScalarizationCost
2160  ? ScalarizationCostPassed
2161  : getScalarizationOverhead(RetVTy, /*Insert*/ true,
2162  /*Extract*/ false, CostKind);
2163 
2164  unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2165  SmallVector<Type *, 4> ScalarTys;
2166  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2167  Type *Ty = Tys[i];
2168  if (Ty->isVectorTy())
2169  Ty = Ty->getScalarType();
2170  ScalarTys.push_back(Ty);
2171  }
2172  IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2173  InstructionCost ScalarCost =
2174  thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2175  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2176  if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
2177  if (!ICA.skipScalarizationCost())
2178  ScalarizationCost += getScalarizationOverhead(
2179  VTy, /*Insert*/ false, /*Extract*/ true, CostKind);
2180  ScalarCalls = std::max(ScalarCalls,
2181  cast<FixedVectorType>(VTy)->getNumElements());
2182  }
2183  }
2184  return ScalarCalls * ScalarCost + ScalarizationCost;
2185  }
2186 
2187  // This is going to be turned into a library call, make it expensive.
2188  return SingleCallCost;
2189  }
2190 
2191  /// Compute a cost of the given call instruction.
2192  ///
2193  /// Compute the cost of calling function F with return type RetTy and
2194  /// argument types Tys. F might be nullptr, in this case the cost of an
2195  /// arbitrary call with the specified signature will be returned.
2196  /// This is used, for instance, when we estimate call of a vector
2197  /// counterpart of the given function.
2198  /// \param F Called function, might be nullptr.
2199  /// \param RetTy Return value types.
2200  /// \param Tys Argument types.
2201  /// \returns The cost of Call instruction.
2203  ArrayRef<Type *> Tys,
2205  return 10;
2206  }
2207 
2208  unsigned getNumberOfParts(Type *Tp) {
2209  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
2210  return LT.first.isValid() ? *LT.first.getValue() : 0;
2211  }
2212 
2214  const SCEV *) {
2215  return 0;
2216  }
2217 
2218  /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
2219  /// We're assuming that reduction operation are performing the following way:
2220  ///
2221  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
2222  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
2223  /// \----------------v-------------/ \----------v------------/
2224  /// n/2 elements n/2 elements
2225  /// %red1 = op <n x t> %val, <n x t> val1
2226  /// After this operation we have a vector %red1 where only the first n/2
2227  /// elements are meaningful, the second n/2 elements are undefined and can be
2228  /// dropped. All other operations are actually working with the vector of
2229  /// length n/2, not n, though the real vector length is still n.
2230  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
2231  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
2232  /// \----------------v-------------/ \----------v------------/
2233  /// n/4 elements 3*n/4 elements
2234  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
2235  /// length n/2, the resulting vector has length n/4 etc.
2236  ///
2237  /// The cost model should take into account that the actual length of the
2238  /// vector is reduced on each iteration.
2241  // Targets must implement a default value for the scalable case, since
2242  // we don't know how many lanes the vector has.
2243  if (isa<ScalableVectorType>(Ty))
2244  return InstructionCost::getInvalid();
2245 
2246  Type *ScalarTy = Ty->getElementType();
2247  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2248  if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2249  ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2250  NumVecElts >= 2) {
2251  // Or reduction for i1 is represented as:
2252  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2253  // %res = cmp ne iReduxWidth %val, 0
2254  // And reduction for i1 is represented as:
2255  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2256  // %res = cmp eq iReduxWidth %val, 11111
2257  Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2258  return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2260  thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2263  }
2264  unsigned NumReduxLevels = Log2_32(NumVecElts);
2265  InstructionCost ArithCost = 0;
2266  InstructionCost ShuffleCost = 0;
2267  std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2268  unsigned LongVectorCount = 0;
2269  unsigned MVTLen =
2270  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2271  while (NumVecElts > MVTLen) {
2272  NumVecElts /= 2;
2273  VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2274  ShuffleCost +=
2275  thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
2276  CostKind, NumVecElts, SubTy);
2277  ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2278  Ty = SubTy;
2279  ++LongVectorCount;
2280  }
2281 
2282  NumReduxLevels -= LongVectorCount;
2283 
2284  // The minimal length of the vector is limited by the real length of vector
2285  // operations performed on the current platform. That's why several final
2286  // reduction operations are performed on the vectors with the same
2287  // architecture-dependent length.
2288 
2289  // By default reductions need one shuffle per reduction level.
2290  ShuffleCost +=
2291  NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2292  std::nullopt, CostKind, 0, Ty);
2293  ArithCost +=
2294  NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2295  return ShuffleCost + ArithCost +
2296  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2297  CostKind, 0, nullptr, nullptr);
2298  }
2299 
2300  /// Try to calculate the cost of performing strict (in-order) reductions,
2301  /// which involves doing a sequence of floating point additions in lane
2302  /// order, starting with an initial value. For example, consider a scalar
2303  /// initial value 'InitVal' of type float and a vector of type <4 x float>:
2304  ///
2305  /// Vector = <float %v0, float %v1, float %v2, float %v3>
2306  ///
2307  /// %add1 = %InitVal + %v0
2308  /// %add2 = %add1 + %v1
2309  /// %add3 = %add2 + %v2
2310  /// %add4 = %add3 + %v3
2311  ///
2312  /// As a simple estimate we can say the cost of such a reduction is 4 times
2313  /// the cost of a scalar FP addition. We can only estimate the costs for
2314  /// fixed-width vectors here because for scalable vectors we do not know the
2315  /// runtime number of operations.
2318  // Targets must implement a default value for the scalable case, since
2319  // we don't know how many lanes the vector has.
2320  if (isa<ScalableVectorType>(Ty))
2321  return InstructionCost::getInvalid();
2322 
2323  auto *VTy = cast<FixedVectorType>(Ty);
2325  VTy, /*Insert=*/false, /*Extract=*/true, CostKind);
2326  InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2327  Opcode, VTy->getElementType(), CostKind);
2328  ArithCost *= VTy->getNumElements();
2329 
2330  return ExtractCost + ArithCost;
2331  }
2332 
2334  std::optional<FastMathFlags> FMF,
2337  return getOrderedReductionCost(Opcode, Ty, CostKind);
2338  return getTreeReductionCost(Opcode, Ty, CostKind);
2339  }
2340 
2341  /// Try to calculate op costs for min/max reduction operations.
2342  /// \param CondTy Conditional type for the Select instruction.
2344  bool IsUnsigned,
2346  // Targets must implement a default value for the scalable case, since
2347  // we don't know how many lanes the vector has.
2348  if (isa<ScalableVectorType>(Ty))
2349  return InstructionCost::getInvalid();
2350 
2351  Type *ScalarTy = Ty->getElementType();
2352  Type *ScalarCondTy = CondTy->getElementType();
2353  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2354  unsigned NumReduxLevels = Log2_32(NumVecElts);
2355  unsigned CmpOpcode;
2356  if (Ty->isFPOrFPVectorTy()) {
2357  CmpOpcode = Instruction::FCmp;
2358  } else {
2359  assert(Ty->isIntOrIntVectorTy() &&
2360  "expecting floating point or integer type for min/max reduction");
2361  CmpOpcode = Instruction::ICmp;
2362  }
2363  InstructionCost MinMaxCost = 0;
2364  InstructionCost ShuffleCost = 0;
2365  std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2366  unsigned LongVectorCount = 0;
2367  unsigned MVTLen =
2368  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2369  while (NumVecElts > MVTLen) {
2370  NumVecElts /= 2;
2371  auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2372  CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
2373 
2374  ShuffleCost +=
2375  thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, std::nullopt,
2376  CostKind, NumVecElts, SubTy);
2377  MinMaxCost +=
2378  thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2380  thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
2382  Ty = SubTy;
2383  ++LongVectorCount;
2384  }
2385 
2386  NumReduxLevels -= LongVectorCount;
2387 
2388  // The minimal length of the vector is limited by the real length of vector
2389  // operations performed on the current platform. That's why several final
2390  // reduction opertions are perfomed on the vectors with the same
2391  // architecture-dependent length.
2392  ShuffleCost +=
2393  NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2394  std::nullopt, CostKind, 0, Ty);
2395  MinMaxCost +=
2396  NumReduxLevels *
2397  (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2399  thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2401  // The last min/max should be in vector registers and we counted it above.
2402  // So just need a single extractelement.
2403  return ShuffleCost + MinMaxCost +
2404  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty,
2405  CostKind, 0, nullptr, nullptr);
2406  }
2407 
2408  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
2409  Type *ResTy, VectorType *Ty,
2410  std::optional<FastMathFlags> FMF,
2412  // Without any native support, this is equivalent to the cost of
2413  // vecreduce.opcode(ext(Ty A)).
2414  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2415  InstructionCost RedCost =
2416  thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind);
2417  InstructionCost ExtCost = thisT()->getCastInstrCost(
2418  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2420 
2421  return RedCost + ExtCost;
2422  }
2423 
2425  VectorType *Ty,
2427  // Without any native support, this is equivalent to the cost of
2428  // vecreduce.add(mul(ext(Ty A), ext(Ty B))) or
2429  // vecreduce.add(mul(A, B)).
2430  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2431  InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2432  Instruction::Add, ExtTy, std::nullopt, CostKind);
2433  InstructionCost ExtCost = thisT()->getCastInstrCost(
2434  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2436 
2437  InstructionCost MulCost =
2438  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2439 
2440  return RedCost + MulCost + 2 * ExtCost;
2441  }
2442 
2444 
2445  /// @}
2446 };
2447 
2448 /// Concrete BasicTTIImpl that can be used if no further customization
2449 /// is needed.
2450 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2452 
2454 
2455  const TargetSubtargetInfo *ST;
2456  const TargetLoweringBase *TLI;
2457 
2458  const TargetSubtargetInfo *getST() const { return ST; }
2459  const TargetLoweringBase *getTLI() const { return TLI; }
2460 
2461 public:
2462  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2463 };
2464 
2465 } // end namespace llvm
2466 
2467 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
llvm::ShuffleVectorInst::isZeroEltSplatMask
static bool isZeroEltSplatMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
Definition: Instructions.cpp:2344
llvm::MCSubtargetInfo::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: MCSubtargetInfo.cpp:361
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:928
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::BasicTTIImplBase::getVectorSplitCost
InstructionCost getVectorSplitCost()
Definition: BasicTTIImpl.h:2443
llvm::TargetLoweringBase::isTruncStoreLegal
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
Definition: TargetLowering.h:1334
llvm::BasicTTIImplBase::getFPOpCost
InstructionCost getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:523
ValueTypes.h
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:466
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:445
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:894
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:473
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::ElementCount::isScalar
constexpr bool isScalar() const
Exactly one element.
Definition: TypeSize.h:302
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1148
llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition: TargetTransformInfoImpl.h:160
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1377
llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:487
MathExtras.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:441
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:192
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout::getTypeStoreSizeInBits
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:487
llvm::TargetTransformInfoImplBase::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: TargetTransformInfoImpl.h:178
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:739
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: TargetLowering.h:618
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:533
llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:396
llvm::BasicTTIImplBase::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:285
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1410
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:990
llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:999
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:217
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:279
llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:324
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:942
llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:297
llvm::Function
Definition: Function.h:59
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1377
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::details::FixedOrScalableQuantity::isScalable
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
llvm::BasicTTIImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1162
llvm::TargetLoweringBase::TypeScalarizeScalableVector
@ TypeScalarizeScalableVector
Definition: TargetLowering.h:217
llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:406
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1275
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:31
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:534
llvm::BasicTTIImplBase::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt)
Definition: BasicTTIImpl.h:963
llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
Definition: BasicTTIImpl.h:271
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1054
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:631
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:152
llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:262
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:452
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:740
llvm::LoopVectorizationLegality
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Definition: LoopVectorizationLegality.h:241
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:151
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:919
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:210
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:220
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetLowering.h:2617
APInt.h
llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:519
llvm::BasicTTIImplBase::~BasicTTIImplBase
virtual ~BasicTTIImplBase()=default
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:745
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:469
llvm::ElementCount::isVector
constexpr bool isVector() const
One or more elements.
Definition: TypeSize.h:306
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition: TargetLowering.h:1323
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
llvm::TargetLoweringBase::isIndexedLoadLegal
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1362
llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:371
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:528
llvm::ElementCount::getFixed
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:291
llvm::TargetLoweringBase::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I) const
Definition: TargetLowering.h:2754
llvm::BasicTTIImplBase::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
Definition: BasicTTIImpl.h:927
llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition: TargetLoweringBase.cpp:1640
llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:365
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::BasicTTIImplBase::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:685
llvm::SmallPtrSet< const BasicBlock *, 4 >
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:921
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::MCSubtargetInfo::getCacheAssociativity
virtual std::optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
Definition: MCSubtargetInfo.cpp:344
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1408
llvm::TargetTransformInfoImplCRTPBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:960
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:515
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2326
llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:710
llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition: TargetLowering.h:196
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetLoweringBase::TypeExpandInteger
@ TypeExpandInteger
Definition: TargetLowering.h:209
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:928
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:902
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:750
llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: BasicTTIImpl.h:375
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2185
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:1025
llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2333
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:911
llvm::BasicTTIImplBase::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:283
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:892
F
#define F(x, y, z)
Definition: MD5.cpp:55
TargetTransformInfoImpl.h
llvm::BasicTTIImplBase::getTreeReductionCost
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
Definition: BasicTTIImpl.h:2239
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)
Definition: BasicTTIImpl.h:1219
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1469
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:162
llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:315
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:153
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
MachineValueType.h
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:986
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::BasicTTIImplBase::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: BasicTTIImpl.h:339
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:47
llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:650
llvm::BasicTTIImplBase::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:681
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:423
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:929
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2624
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Definition: TargetTransformInfoImpl.h:224
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::TargetTransformInfoImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Definition: TargetTransformInfoImpl.h:166
TargetLowering.h
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:156
llvm::MCSubtargetInfo::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
Definition: MCSubtargetInfo.cpp:357
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:900
llvm::BasicTTIImplBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:813
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:541
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1181
llvm::MCSubtargetInfo::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
Definition: MCSubtargetInfo.cpp:353
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetLowering.h:1727
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:538
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::TargetTransformInfoImplBase
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:34
llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:534
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
Constants.h
llvm::BasicTTIImplBase::getCacheSize
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:661
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:920
InlinePriorityMode::Cost
@ Cost
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:891
llvm::BasicTTIImplBase::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis()
Definition: BasicTTIImpl.h:281
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Triple::isOSDarwin
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
Definition: Triple.h:518
llvm::BasicTTIImplBase::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:697
llvm::TargetLoweringBase::LegalizeKind
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Definition: TargetLowering.h:228
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1144
llvm::BasicTTIImplBase::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:2213
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::getTypeConversion
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
Definition: TargetLoweringBase.cpp:958
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2595
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:155
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:287
InstrTypes.h
llvm::BasicTTIImplBase::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1238
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7993
llvm::BasicTTIImplBase::getMulAccReductionCost
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2424
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:927
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:195
llvm::BasicTTIImplBase::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:311
llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:1153
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::dwarf::Index
Index
Definition: Dwarf.h:550
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:373
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:849
llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:847
llvm::BasicTTIImplBase::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:411
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:728
llvm::Instruction
Definition: Instruction.h:41
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1413
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:188
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetLoweringBase.cpp:1917
llvm::TargetMachine::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: TargetMachine.h:331
llvm::BasicTTIImplBase::getCacheAssociativity
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:667
llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent()
Definition: BasicTTIImpl.h:536
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:926
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:925
llvm::TargetTransformInfoImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition: TargetTransformInfoImpl.h:575
llvm::BasicTTIImplBase::getCacheLineSize
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:677
BitVector.h
llvm::TargetTransformInfoImplCRTPBase
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:950
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1377
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition: TargetTransformInfo.h:929
llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition: TargetTransformInfoImpl.h:184
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetTransformInfoImplBase::getCacheAssociativity
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: TargetTransformInfoImpl.h:473
llvm::BasicTTIImplBase::isSingleThreaded
bool isSingleThreaded() const
Definition: BasicTTIImpl.h:305
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:355
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1167
llvm::PartialUnrollingThreshold
cl::opt< unsigned > PartialUnrollingThreshold
llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:422
llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:993
Type.h
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::TargetOptions::ThreadModel
ThreadModel::Model ThreadModel
ThreadModel - This flag specifies the type of threading model to assume for things like atomics.
Definition: TargetOptions.h:403
llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables()
Definition: BasicTTIImpl.h:481
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1327
LoopInfo.h
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)
Definition: BasicTTIImpl.h:1225
llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfoImpl.h:226
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1377
llvm::ARM_PROC::IE
@ IE
Definition: ARMBaseInfo.h:27
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:917
llvm::BasicTTIImplBase::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask()
Definition: BasicTTIImpl.h:633
llvm::TargetLoweringBase::isBeneficialToExpandPowI
bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
Definition: TargetLowering.h:2273
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:893
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: TargetLowering.h:613
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:955
BasicBlock.h
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:328
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:508
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:898
llvm::TargetLoweringBase::getNumRegisters
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, std::optional< MVT > RegisterVT=std::nullopt) const
Return the number of registers that this ValueType will eventually require.
Definition: TargetLowering.h:1583
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: TargetMachine.h:318
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:920
llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:512
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1414
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
uint64_t
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:722
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:402
llvm::BasicTTIImplBase::getVScaleForTuning
std::optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:715
llvm::APIntOps::ScaleBitMask
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2960
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition: TargetLowering.h:1309
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:936
llvm::TargetLoweringBase::getLoadExtAction
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition: TargetLowering.h:1297
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:914
llvm::TypeSize::getFixed
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition: TargetLowering.h:1259
llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:359
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition: TargetLowering.h:1195
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:31
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:416
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:595
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:145
llvm::TargetTransformInfoImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:531
llvm::BasicTTIImplBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:416
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:79
ArrayRef.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:536
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1408
llvm::TargetMachine::Options
TargetOptions Options
Definition: TargetMachine.h:119
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:79
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:124
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:62
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:751
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1332
llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1272
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:904
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:862
llvm::BasicTTIImpl::BasicTTIImpl
BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition: BasicTargetTransformInfo.cpp:32
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1377
llvm::FloatStyle::Exponent
@ Exponent
llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfoImpl.h:496
llvm::TargetSubtargetInfo::useAA
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Definition: TargetSubtargetInfo.cpp:56
llvm::MCSubtargetInfo::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Definition: MCSubtargetInfo.cpp:365
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:693
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2175
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:532
TargetOptions.h
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:956
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::LoopInfo
Definition: LoopInfo.h:1108
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1377
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetMachine::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: TargetMachine.h:341
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:743
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::BasicTTIImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
Definition: BasicTTIImpl.h:625
llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: TargetTransformInfoImpl.h:190
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2593
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TargetSubtargetInfo.h
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:766
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:921
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1785
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:255
llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:584
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:922
llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:610
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1411
llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:493
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:433
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:912
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:897
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:62
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1408
llvm::BasicTTIImplBase::useAA
bool useAA() const
Definition: BasicTTIImpl.h:404
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::RISCVISD::LA
@ LA
Definition: RISCVISelLowering.h:337
llvm::TargetLoweringBase::TypeLegal
@ TypeLegal
Definition: TargetLowering.h:207
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:720
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1412
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition: TargetLoweringBase.cpp:946
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:912
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:165
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:350
llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:618
llvm::details::FixedOrScalableQuantity< TypeSize, uint64_t >::isKnownLT
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition: TypeSize.h:198
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2190
llvm::ShuffleVectorInst::isSpliceMask
static bool isSpliceMask(ArrayRef< int > Mask, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
Definition: Instructions.cpp:2403
Constant.h
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2594
llvm::BasicTTIImplBase::instCombineIntrinsic
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:637
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1316
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition: TargetLowering.h:2966
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:943
llvm::BasicTTIImpl
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:2450
llvm::KnownBits
Definition: KnownBits.h:23
llvm::BasicTTIImplBase::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:2202
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2596
llvm::TargetLoweringBase::isIndexedStoreLegal
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1376
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:483
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:915
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:923
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2356
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:228
llvm::TargetTransformInfo::getOperandInfo
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:736
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:914
ISDOpcodes.h
llvm::TypeSize
Definition: TypeSize.h:314
Casting.h
llvm::BasicTTIImplBase::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1316
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1284
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:105
llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:400
llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:320
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
llvm::Function::isTargetIntrinsic
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
Definition: Function.cpp:831
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:292
llvm::ThreadModel::Single
@ Single
Definition: TargetOptions.h:57
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:186
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:966
llvm::PredicationStyle
PredicationStyle
Definition: TargetTransformInfo.h:165
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1209
llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:96
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2343
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:31
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: TargetLowering.h:2734
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:795
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:689
llvm::BasicTTIImplBase::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1323
Instructions.h
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:150
llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition: TargetLowering.h:1227
SmallVector.h
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:969
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:741
N
#define N
llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1476
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:2208
TargetTransformInfo.h
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2592
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2823
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1415
llvm::SmallVectorImpl< int >
llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:301
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:438
llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:1698
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::BasicTTIImplBase::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:383
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:244
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3283
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetTransformInfoImplBase::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask() const
Definition: TargetTransformInfoImpl.h:174
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1494
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:158
llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:643
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:213
llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:646
llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:932
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:924
llvm::BasicTTIImplBase::getOrderedReductionCost
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
Definition: BasicTTIImpl.h:2316
Value.h
llvm::BasicTTIImplBase::getMaxVScale
std::optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:714
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1302
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence()
Definition: BasicTTIImpl.h:279
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:918
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:370
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::BasicTTIImplBase::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2408
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:218
llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:379
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:155
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::ShuffleVectorInst::isTransposeMask
static bool isTransposeMask(ArrayRef< int > Mask)
Return true if this shuffle mask is a transpose mask.
Definition: Instructions.cpp:2369
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:899
llvm::MCSubtargetInfo::getCacheLineSize
virtual std::optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Definition: MCSubtargetInfo.cpp:349
llvm::Triple::aarch64
@ aarch64
Definition: Triple.h:51
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2180
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::MCSubtargetInfo::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: MCSubtargetInfo.cpp:372
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:986
llvm::BasicTTIImplBase::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: BasicTTIImpl.h:701
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:39
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722