LLVM  16.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Helper function to access this as a T.
83  T *thisT() { return static_cast<T *>(this); }
84 
85  /// Estimate a cost of Broadcast as an extract and sequence of insert
86  /// operations.
87  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) {
89  // Broadcast cost is equal to the cost of extracting the zero'th element
90  // plus the cost of inserting it into every element of the result vector.
91  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
92 
93  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
94  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) {
102  InstructionCost Cost = 0;
103  // Shuffle cost is equal to the cost of extracting element from its argument
104  // plus the cost of inserting them onto the result vector.
105 
106  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
107  // index 0 of first vector, index 1 of second vector,index 2 of first
108  // vector and finally index 3 of second vector and insert them at index
109  // <0,1,2,3> of result vector.
110  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
111  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
112  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
113  }
114  return Cost;
115  }
116 
117  /// Estimate a cost of subvector extraction as a sequence of extract and
118  /// insert operations.
119  InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index,
120  FixedVectorType *SubVTy) {
121  assert(VTy && SubVTy &&
122  "Can only extract subvectors from vectors");
123  int NumSubElts = SubVTy->getNumElements();
124  assert((!isa<FixedVectorType>(VTy) ||
125  (Index + NumSubElts) <=
126  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
127  "SK_ExtractSubvector index out of range");
128 
129  InstructionCost Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
135  i + Index);
136  Cost +=
137  thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index,
145  FixedVectorType *SubVTy) {
146  assert(VTy && SubVTy &&
147  "Can only insert subvectors into vectors");
148  int NumSubElts = SubVTy->getNumElements();
149  assert((!isa<FixedVectorType>(VTy) ||
150  (Index + NumSubElts) <=
151  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
152  "SK_InsertSubvector index out of range");
153 
154  InstructionCost Cost = 0;
155  // Subvector insertion cost is equal to the cost of extracting element from
156  // the source type plus the cost of inserting them into the result vector
157  // type.
158  for (int i = 0; i != NumSubElts; ++i) {
159  Cost +=
160  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
161  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
162  i + Index);
163  }
164  return Cost;
165  }
166 
167  /// Local query method delegates up to T which *must* implement this!
168  const TargetSubtargetInfo *getST() const {
169  return static_cast<const T *>(this)->getST();
170  }
171 
172  /// Local query method delegates up to T which *must* implement this!
173  const TargetLoweringBase *getTLI() const {
174  return static_cast<const T *>(this)->getTLI();
175  }
176 
177  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
178  switch (M) {
179  case TTI::MIM_Unindexed:
180  return ISD::UNINDEXED;
181  case TTI::MIM_PreInc:
182  return ISD::PRE_INC;
183  case TTI::MIM_PreDec:
184  return ISD::PRE_DEC;
185  case TTI::MIM_PostInc:
186  return ISD::POST_INC;
187  case TTI::MIM_PostDec:
188  return ISD::POST_DEC;
189  }
190  llvm_unreachable("Unexpected MemIndexedMode");
191  }
192 
193  InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
194  Align Alignment,
195  bool VariableMask,
196  bool IsGatherScatter,
198  // We cannot scalarize scalable vectors, so return Invalid.
199  if (isa<ScalableVectorType>(DataTy))
201 
202  auto *VT = cast<FixedVectorType>(DataTy);
203  // Assume the target does not have support for gather/scatter operations
204  // and provide a rough estimate.
205  //
206  // First, compute the cost of the individual memory operations.
207  InstructionCost AddrExtractCost =
208  IsGatherScatter
209  ? getVectorInstrCost(Instruction::ExtractElement,
211  PointerType::get(VT->getElementType(), 0),
212  VT->getNumElements()),
213  -1)
214  : 0;
215  InstructionCost LoadCost =
216  VT->getNumElements() *
217  (AddrExtractCost +
218  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
219 
220  // Next, compute the cost of packing the result in a vector.
222  VT, Opcode != Instruction::Store, Opcode == Instruction::Store);
223 
224  InstructionCost ConditionalCost = 0;
225  if (VariableMask) {
226  // Compute the cost of conditionally executing the memory operations with
227  // variable masks. This includes extracting the individual conditions, a
228  // branches and PHIs to combine the results.
229  // NOTE: Estimating the cost of conditionally executing the memory
230  // operations accurately is quite difficult and the current solution
231  // provides a very rough estimate only.
232  ConditionalCost =
233  VT->getNumElements() *
235  Instruction::ExtractElement,
237  VT->getNumElements()),
238  -1) +
239  getCFInstrCost(Instruction::Br, CostKind) +
241  }
242 
243  return LoadCost + PackingCost + ConditionalCost;
244  }
245 
246 protected:
247  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
248  : BaseT(DL) {}
249  virtual ~BasicTTIImplBase() = default;
250 
252 
253 public:
254  /// \name Scalar TTI Implementations
255  /// @{
257  unsigned AddressSpace, Align Alignment,
258  bool *Fast) const {
260  return getTLI()->allowsMisalignedMemoryAccesses(
261  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
262  }
263 
264  bool hasBranchDivergence() { return false; }
265 
266  bool useGPUDivergenceAnalysis() { return false; }
267 
268  bool isSourceOfDivergence(const Value *V) { return false; }
269 
270  bool isAlwaysUniform(const Value *V) { return false; }
271 
272  unsigned getFlatAddressSpace() {
273  // Return an invalid address space.
274  return -1;
275  }
276 
278  Intrinsic::ID IID) const {
279  return false;
280  }
281 
282  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
283  return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
284  }
285 
286  unsigned getAssumedAddrSpace(const Value *V) const {
287  return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
288  }
289 
290  std::pair<const Value *, unsigned>
291  getPredicatedAddrSpace(const Value *V) const {
292  return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
293  }
294 
296  Value *NewV) const {
297  return nullptr;
298  }
299 
300  bool isLegalAddImmediate(int64_t imm) {
301  return getTLI()->isLegalAddImmediate(imm);
302  }
303 
304  bool isLegalICmpImmediate(int64_t imm) {
305  return getTLI()->isLegalICmpImmediate(imm);
306  }
307 
308  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
309  bool HasBaseReg, int64_t Scale,
310  unsigned AddrSpace, Instruction *I = nullptr) {
312  AM.BaseGV = BaseGV;
313  AM.BaseOffs = BaseOffset;
314  AM.HasBaseReg = HasBaseReg;
315  AM.Scale = Scale;
316  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
317  }
318 
319  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
320  Type *ScalarValTy) const {
321  auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
322  auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
323  EVT VT = getTLI()->getValueType(DL, SrcTy);
324  if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
325  getTLI()->isOperationCustom(ISD::STORE, VT))
326  return true;
327 
328  EVT ValVT =
329  getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
330  EVT LegalizedVT =
331  getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
332  return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
333  };
334  while (VF > 2 && IsSupportedByTarget(VF))
335  VF /= 2;
336  return VF;
337  }
338 
340  const DataLayout &DL) const {
341  EVT VT = getTLI()->getValueType(DL, Ty);
342  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
343  }
344 
346  const DataLayout &DL) const {
347  EVT VT = getTLI()->getValueType(DL, Ty);
348  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
349  }
350 
353  }
354 
357  }
358 
361  }
362 
364  int64_t BaseOffset, bool HasBaseReg,
365  int64_t Scale, unsigned AddrSpace) {
367  AM.BaseGV = BaseGV;
368  AM.BaseOffs = BaseOffset;
369  AM.HasBaseReg = HasBaseReg;
370  AM.Scale = Scale;
371  if (getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace))
372  return 0;
373  return -1;
374  }
375 
376  bool isTruncateFree(Type *Ty1, Type *Ty2) {
377  return getTLI()->isTruncateFree(Ty1, Ty2);
378  }
379 
381  return getTLI()->isProfitableToHoist(I);
382  }
383 
384  bool useAA() const { return getST()->useAA(); }
385 
386  bool isTypeLegal(Type *Ty) {
387  EVT VT = getTLI()->getValueType(DL, Ty);
388  return getTLI()->isTypeLegal(VT);
389  }
390 
391  unsigned getRegUsageForType(Type *Ty) {
392  EVT ETy = getTLI()->getValueType(DL, Ty);
393  return getTLI()->getNumRegisters(Ty->getContext(), ETy);
394  }
395 
396  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
399  return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
400  }
401 
403  unsigned &JumpTableSize,
404  ProfileSummaryInfo *PSI,
406  /// Try to find the estimated number of clusters. Note that the number of
407  /// clusters identified in this function could be different from the actual
408  /// numbers found in lowering. This function ignore switches that are
409  /// lowered with a mix of jump table / bit test / BTree. This function was
410  /// initially intended to be used when estimating the cost of switch in
411  /// inline cost heuristic, but it's a generic cost model to be used in other
412  /// places (e.g., in loop unrolling).
413  unsigned N = SI.getNumCases();
414  const TargetLoweringBase *TLI = getTLI();
415  const DataLayout &DL = this->getDataLayout();
416 
417  JumpTableSize = 0;
418  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
419 
420  // Early exit if both a jump table and bit test are not allowed.
421  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
422  return N;
423 
424  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
425  APInt MinCaseVal = MaxCaseVal;
426  for (auto CI : SI.cases()) {
427  const APInt &CaseVal = CI.getCaseValue()->getValue();
428  if (CaseVal.sgt(MaxCaseVal))
429  MaxCaseVal = CaseVal;
430  if (CaseVal.slt(MinCaseVal))
431  MinCaseVal = CaseVal;
432  }
433 
434  // Check if suitable for a bit test
435  if (N <= DL.getIndexSizeInBits(0u)) {
437  for (auto I : SI.cases())
438  Dests.insert(I.getCaseSuccessor());
439 
440  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
441  DL))
442  return 1;
443  }
444 
445  // Check if suitable for a jump table.
446  if (IsJTAllowed) {
447  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
448  return N;
449  uint64_t Range =
450  (MaxCaseVal - MinCaseVal)
451  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
452  // Check whether a range of clusters is dense enough for a jump table
453  if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
454  JumpTableSize = Range;
455  return 1;
456  }
457  }
458  return N;
459  }
460 
462  const TargetLoweringBase *TLI = getTLI();
465  }
466 
468  const TargetMachine &TM = getTLI()->getTargetMachine();
469  // If non-PIC mode, do not generate a relative lookup table.
470  if (!TM.isPositionIndependent())
471  return false;
472 
473  /// Relative lookup table entries consist of 32-bit offsets.
474  /// Do not generate relative lookup tables for large code models
475  /// in 64-bit achitectures where 32-bit offsets might not be enough.
476  if (TM.getCodeModel() == CodeModel::Medium ||
477  TM.getCodeModel() == CodeModel::Large)
478  return false;
479 
480  Triple TargetTriple = TM.getTargetTriple();
481  if (!TargetTriple.isArch64Bit())
482  return false;
483 
484  // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it
485  // there.
486  if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
487  return false;
488 
489  return true;
490  }
491 
492  bool haveFastSqrt(Type *Ty) {
493  const TargetLoweringBase *TLI = getTLI();
494  EVT VT = TLI->getValueType(DL, Ty);
495  return TLI->isTypeLegal(VT) &&
497  }
498 
500  return true;
501  }
502 
504  // Check whether FADD is available, as a proxy for floating-point in
505  // general.
506  const TargetLoweringBase *TLI = getTLI();
507  EVT VT = TLI->getValueType(DL, Ty);
511  }
512 
513  unsigned getInliningThresholdMultiplier() { return 1; }
514  unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
515 
516  int getInlinerVectorBonusPercent() { return 150; }
517 
521  // This unrolling functionality is target independent, but to provide some
522  // motivation for its intended use, for x86:
523 
524  // According to the Intel 64 and IA-32 Architectures Optimization Reference
525  // Manual, Intel Core models and later have a loop stream detector (and
526  // associated uop queue) that can benefit from partial unrolling.
527  // The relevant requirements are:
528  // - The loop must have no more than 4 (8 for Nehalem and later) branches
529  // taken, and none of them may be calls.
530  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
531 
532  // According to the Software Optimization Guide for AMD Family 15h
533  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
534  // and loop buffer which can benefit from partial unrolling.
535  // The relevant requirements are:
536  // - The loop must have fewer than 16 branches
537  // - The loop must have less than 40 uops in all executed loop branches
538 
539  // The number of taken branches in a loop is hard to estimate here, and
540  // benchmarking has revealed that it is better not to be conservative when
541  // estimating the branch count. As a result, we'll ignore the branch limits
542  // until someone finds a case where it matters in practice.
543 
544  unsigned MaxOps;
545  const TargetSubtargetInfo *ST = getST();
546  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
547  MaxOps = PartialUnrollingThreshold;
548  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
549  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
550  else
551  return;
552 
553  // Scan the loop: don't unroll loops with calls.
554  for (BasicBlock *BB : L->blocks()) {
555  for (Instruction &I : *BB) {
556  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
557  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
558  if (!thisT()->isLoweredToCall(F))
559  continue;
560  }
561 
562  if (ORE) {
563  ORE->emit([&]() {
564  return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
565  L->getHeader())
566  << "advising against unrolling the loop because it "
567  "contains a "
568  << ore::NV("Call", &I);
569  });
570  }
571  return;
572  }
573  }
574  }
575 
576  // Enable runtime and partial unrolling up to the specified size.
577  // Enable using trip count upper bound to unroll loops.
578  UP.Partial = UP.Runtime = UP.UpperBound = true;
579  UP.PartialThreshold = MaxOps;
580 
581  // Avoid unrolling when optimizing for size.
582  UP.OptSizeThreshold = 0;
584 
585  // Set number of instructions optimized when "back edge"
586  // becomes "fall through" to default value of 2.
587  UP.BEInsns = 2;
588  }
589 
592  PP.PeelCount = 0;
593  PP.AllowPeeling = true;
594  PP.AllowLoopNestsPeeling = false;
595  PP.PeelProfiledIterations = true;
596  }
597 
599  AssumptionCache &AC,
600  TargetLibraryInfo *LibInfo,
601  HardwareLoopInfo &HWLoopInfo) {
602  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
603  }
604 
607  DominatorTree *DT,
609  InterleavedAccessInfo *IAI) {
610  return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LVL, IAI);
611  }
612 
615  }
616 
618  IntrinsicInst &II) {
619  return BaseT::instCombineIntrinsic(IC, II);
620  }
621 
623  IntrinsicInst &II,
624  APInt DemandedMask,
625  KnownBits &Known,
626  bool &KnownBitsComputed) {
627  return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
628  KnownBitsComputed);
629  }
630 
632  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
633  APInt &UndefElts2, APInt &UndefElts3,
634  std::function<void(Instruction *, unsigned, APInt, APInt &)>
635  SimplifyAndSetOp) {
637  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
638  SimplifyAndSetOp);
639  }
640 
641  virtual Optional<unsigned>
643  return Optional<unsigned>(
644  getST()->getCacheSize(static_cast<unsigned>(Level)));
645  }
646 
647  virtual Optional<unsigned>
649  Optional<unsigned> TargetResult =
650  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
651 
652  if (TargetResult)
653  return TargetResult;
654 
656  }
657 
658  virtual unsigned getCacheLineSize() const {
659  return getST()->getCacheLineSize();
660  }
661 
662  virtual unsigned getPrefetchDistance() const {
663  return getST()->getPrefetchDistance();
664  }
665 
666  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
667  unsigned NumStridedMemAccesses,
668  unsigned NumPrefetches,
669  bool HasCall) const {
670  return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
671  NumPrefetches, HasCall);
672  }
673 
674  virtual unsigned getMaxPrefetchIterationsAhead() const {
675  return getST()->getMaxPrefetchIterationsAhead();
676  }
677 
678  virtual bool enableWritePrefetching() const {
679  return getST()->enableWritePrefetching();
680  }
681 
682  virtual bool shouldPrefetchAddressSpace(unsigned AS) const {
683  return getST()->shouldPrefetchAddressSpace(AS);
684  }
685 
686  /// @}
687 
688  /// \name Vector TTI Implementations
689  /// @{
690 
692  return TypeSize::getFixed(32);
693  }
694 
697 
698  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
699  /// are set if the demanded result elements need to be inserted and/or
700  /// extracted from vectors.
702  const APInt &DemandedElts,
703  bool Insert, bool Extract) {
704  /// FIXME: a bitfield is not a reasonable abstraction for talking about
705  /// which elements are needed from a scalable vector
706  if (isa<ScalableVectorType>(InTy))
708  auto *Ty = cast<FixedVectorType>(InTy);
709 
710  assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
711  "Vector size mismatch");
712 
713  InstructionCost Cost = 0;
714 
715  for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
716  if (!DemandedElts[i])
717  continue;
718  if (Insert)
719  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
720  if (Extract)
721  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
722  }
723 
724  return Cost;
725  }
726 
727  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
729  bool Extract) {
730  if (isa<ScalableVectorType>(InTy))
732  auto *Ty = cast<FixedVectorType>(InTy);
733 
734  APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
735  return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
736  }
737 
738  /// Estimate the overhead of scalarizing an instructions unique
739  /// non-constant operands. The (potentially vector) types to use for each of
740  /// argument are passes via Tys.
742  ArrayRef<Type *> Tys) {
743  assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
744 
745  InstructionCost Cost = 0;
746  SmallPtrSet<const Value*, 4> UniqueOperands;
747  for (int I = 0, E = Args.size(); I != E; I++) {
748  // Disregard things like metadata arguments.
749  const Value *A = Args[I];
750  Type *Ty = Tys[I];
751  if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
752  !Ty->isPtrOrPtrVectorTy())
753  continue;
754 
755  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
756  if (auto *VecTy = dyn_cast<VectorType>(Ty))
757  Cost += getScalarizationOverhead(VecTy, false, true);
758  }
759  }
760 
761  return Cost;
762  }
763 
764  /// Estimate the overhead of scalarizing the inputs and outputs of an
765  /// instruction, with return type RetTy and arguments Args of type Tys. If
766  /// Args are unknown (empty), then the cost associated with one argument is
767  /// added as a heuristic.
770  ArrayRef<Type *> Tys) {
771  InstructionCost Cost = getScalarizationOverhead(RetTy, true, false);
772  if (!Args.empty())
774  else
775  // When no information on arguments is provided, we add the cost
776  // associated with one argument as a heuristic.
777  Cost += getScalarizationOverhead(RetTy, false, true);
778 
779  return Cost;
780  }
781 
782  /// Estimate the cost of type-legalization and the legalized type.
783  std::pair<InstructionCost, MVT> getTypeLegalizationCost(Type *Ty) const {
784  LLVMContext &C = Ty->getContext();
785  EVT MTy = getTLI()->getValueType(DL, Ty);
786 
787  InstructionCost Cost = 1;
788  // We keep legalizing the type until we find a legal kind. We assume that
789  // the only operation that costs anything is the split. After splitting
790  // we need to handle two types.
791  while (true) {
793 
795  // Ensure we return a sensible simple VT here, since many callers of
796  // this function require it.
797  MVT VT = MTy.isSimple() ? MTy.getSimpleVT() : MVT::i64;
798  return std::make_pair(InstructionCost::getInvalid(), VT);
799  }
800 
801  if (LK.first == TargetLoweringBase::TypeLegal)
802  return std::make_pair(Cost, MTy.getSimpleVT());
803 
804  if (LK.first == TargetLoweringBase::TypeSplitVector ||
806  Cost *= 2;
807 
808  // Do not loop with f128 type.
809  if (MTy == LK.second)
810  return std::make_pair(Cost, MTy.getSimpleVT());
811 
812  // Keep legalizing the type.
813  MTy = LK.second;
814  }
815  }
816 
817  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
818 
820  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
823  ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
824  const Instruction *CxtI = nullptr) {
825  // Check if any of the operands are vector operands.
826  const TargetLoweringBase *TLI = getTLI();
827  int ISD = TLI->InstructionOpcodeToISD(Opcode);
828  assert(ISD && "Invalid opcode");
829 
830  // TODO: Handle more cost kinds.
832  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
833  Opd1Info, Opd2Info,
834  Args, CxtI);
835 
836  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
837 
838  bool IsFloat = Ty->isFPOrFPVectorTy();
839  // Assume that floating point arithmetic operations cost twice as much as
840  // integer operations.
841  InstructionCost OpCost = (IsFloat ? 2 : 1);
842 
843  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
844  // The operation is legal. Assume it costs 1.
845  // TODO: Once we have extract/insert subvector cost we need to use them.
846  return LT.first * OpCost;
847  }
848 
849  if (!TLI->isOperationExpand(ISD, LT.second)) {
850  // If the operation is custom lowered, then assume that the code is twice
851  // as expensive.
852  return LT.first * 2 * OpCost;
853  }
854 
855  // An 'Expand' of URem and SRem is special because it may default
856  // to expanding the operation into a sequence of sub-operations
857  // i.e. X % Y -> X-(X/Y)*Y.
858  if (ISD == ISD::UREM || ISD == ISD::SREM) {
859  bool IsSigned = ISD == ISD::SREM;
860  if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
861  LT.second) ||
862  TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
863  LT.second)) {
864  unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
865  InstructionCost DivCost = thisT()->getArithmeticInstrCost(
866  DivOpc, Ty, CostKind, Opd1Info, Opd2Info);
867  InstructionCost MulCost =
868  thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
869  InstructionCost SubCost =
870  thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
871  return DivCost + MulCost + SubCost;
872  }
873  }
874 
875  // We cannot scalarize scalable vectors, so return Invalid.
876  if (isa<ScalableVectorType>(Ty))
878 
879  // Else, assume that we need to scalarize this op.
880  // TODO: If one of the types get legalized by splitting, handle this
881  // similarly to what getCastInstrCost() does.
882  if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
883  InstructionCost Cost = thisT()->getArithmeticInstrCost(
884  Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
885  Args, CxtI);
886  // Return the cost of multiple scalar invocation plus the cost of
887  // inserting and extracting the values.
888  SmallVector<Type *> Tys(Args.size(), Ty);
889  return getScalarizationOverhead(VTy, Args, Tys) +
890  VTy->getNumElements() * Cost;
891  }
892 
893  // We don't know anything about this scalar instruction.
894  return OpCost;
895  }
896 
898  ArrayRef<int> Mask) const {
899  int Limit = Mask.size() * 2;
900  if (Mask.empty() ||
901  // Extra check required by isSingleSourceMaskImpl function (called by
902  // ShuffleVectorInst::isSingleSourceMask).
903  any_of(Mask, [Limit](int I) { return I >= Limit; }))
904  return Kind;
905  int Index;
906  switch (Kind) {
909  return TTI::SK_Reverse;
911  return TTI::SK_Broadcast;
912  break;
915  return TTI::SK_Select;
917  return TTI::SK_Transpose;
919  return TTI::SK_Splice;
920  break;
921  case TTI::SK_Select:
922  case TTI::SK_Reverse:
923  case TTI::SK_Broadcast:
924  case TTI::SK_Transpose:
927  case TTI::SK_Splice:
928  break;
929  }
930  return Kind;
931  }
932 
936  VectorType *SubTp,
938 
940  case TTI::SK_Broadcast:
941  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
942  return getBroadcastShuffleOverhead(FVT);
944  case TTI::SK_Select:
945  case TTI::SK_Splice:
946  case TTI::SK_Reverse:
947  case TTI::SK_Transpose:
950  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
951  return getPermuteShuffleOverhead(FVT);
954  return getExtractSubvectorOverhead(Tp, Index,
955  cast<FixedVectorType>(SubTp));
957  return getInsertSubvectorOverhead(Tp, Index,
958  cast<FixedVectorType>(SubTp));
959  }
960  llvm_unreachable("Unknown TTI::ShuffleKind");
961  }
962 
963  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
966  const Instruction *I = nullptr) {
967  if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
968  return 0;
969 
970  const TargetLoweringBase *TLI = getTLI();
971  int ISD = TLI->InstructionOpcodeToISD(Opcode);
972  assert(ISD && "Invalid opcode");
973  std::pair<InstructionCost, MVT> SrcLT = getTypeLegalizationCost(Src);
974  std::pair<InstructionCost, MVT> DstLT = getTypeLegalizationCost(Dst);
975 
976  TypeSize SrcSize = SrcLT.second.getSizeInBits();
977  TypeSize DstSize = DstLT.second.getSizeInBits();
978  bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
979  bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
980 
981  switch (Opcode) {
982  default:
983  break;
984  case Instruction::Trunc:
985  // Check for NOOP conversions.
986  if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
987  return 0;
988  [[fallthrough]];
989  case Instruction::BitCast:
990  // Bitcast between types that are legalized to the same type are free and
991  // assume int to/from ptr of the same size is also free.
992  if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
993  SrcSize == DstSize)
994  return 0;
995  break;
996  case Instruction::FPExt:
997  if (I && getTLI()->isExtFree(I))
998  return 0;
999  break;
1000  case Instruction::ZExt:
1001  if (TLI->isZExtFree(SrcLT.second, DstLT.second))
1002  return 0;
1003  [[fallthrough]];
1004  case Instruction::SExt:
1005  if (I && getTLI()->isExtFree(I))
1006  return 0;
1007 
1008  // If this is a zext/sext of a load, return 0 if the corresponding
1009  // extending load exists on target and the result type is legal.
1010  if (CCH == TTI::CastContextHint::Normal) {
1011  EVT ExtVT = EVT::getEVT(Dst);
1012  EVT LoadVT = EVT::getEVT(Src);
1013  unsigned LType =
1014  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
1015  if (DstLT.first == SrcLT.first &&
1016  TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
1017  return 0;
1018  }
1019  break;
1020  case Instruction::AddrSpaceCast:
1021  if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
1022  Dst->getPointerAddressSpace()))
1023  return 0;
1024  break;
1025  }
1026 
1027  auto *SrcVTy = dyn_cast<VectorType>(Src);
1028  auto *DstVTy = dyn_cast<VectorType>(Dst);
1029 
1030  // If the cast is marked as legal (or promote) then assume low cost.
1031  if (SrcLT.first == DstLT.first &&
1032  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
1033  return SrcLT.first;
1034 
1035  // Handle scalar conversions.
1036  if (!SrcVTy && !DstVTy) {
1037  // Just check the op cost. If the operation is legal then assume it costs
1038  // 1.
1039  if (!TLI->isOperationExpand(ISD, DstLT.second))
1040  return 1;
1041 
1042  // Assume that illegal scalar instruction are expensive.
1043  return 4;
1044  }
1045 
1046  // Check vector-to-vector casts.
1047  if (DstVTy && SrcVTy) {
1048  // If the cast is between same-sized registers, then the check is simple.
1049  if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1050 
1051  // Assume that Zext is done using AND.
1052  if (Opcode == Instruction::ZExt)
1053  return SrcLT.first;
1054 
1055  // Assume that sext is done using SHL and SRA.
1056  if (Opcode == Instruction::SExt)
1057  return SrcLT.first * 2;
1058 
1059  // Just check the op cost. If the operation is legal then assume it
1060  // costs
1061  // 1 and multiply by the type-legalization overhead.
1062  if (!TLI->isOperationExpand(ISD, DstLT.second))
1063  return SrcLT.first * 1;
1064  }
1065 
1066  // If we are legalizing by splitting, query the concrete TTI for the cost
1067  // of casting the original vector twice. We also need to factor in the
1068  // cost of the split itself. Count that as 1, to be consistent with
1069  // getTypeLegalizationCost().
1070  bool SplitSrc =
1071  TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1073  bool SplitDst =
1074  TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1076  if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1077  DstVTy->getElementCount().isVector()) {
1078  Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1079  Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1080  T *TTI = static_cast<T *>(this);
1081  // If both types need to be split then the split is free.
1082  InstructionCost SplitCost =
1083  (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1084  return SplitCost +
1085  (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1086  CostKind, I));
1087  }
1088 
1089  // Scalarization cost is Invalid, can't assume any num elements.
1090  if (isa<ScalableVectorType>(DstVTy))
1091  return InstructionCost::getInvalid();
1092 
1093  // In other cases where the source or destination are illegal, assume
1094  // the operation will get scalarized.
1095  unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1096  InstructionCost Cost = thisT()->getCastInstrCost(
1097  Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1098 
1099  // Return the cost of multiple scalar invocation plus the cost of
1100  // inserting and extracting the values.
1101  return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
1102  }
1103 
1104  // We already handled vector-to-vector and scalar-to-scalar conversions.
1105  // This
1106  // is where we handle bitcast between vectors and scalars. We need to assume
1107  // that the conversion is scalarized in one way or another.
1108  if (Opcode == Instruction::BitCast) {
1109  // Illegal bitcasts are done by storing and loading from a stack slot.
1110  return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
1111  (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
1112  }
1113 
1114  llvm_unreachable("Unhandled cast");
1115  }
1116 
1118  VectorType *VecTy, unsigned Index) {
1119  return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1120  Index) +
1121  thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1124  }
1125 
1127  const Instruction *I = nullptr) {
1128  return BaseT::getCFInstrCost(Opcode, CostKind, I);
1129  }
1130 
1131  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1132  CmpInst::Predicate VecPred,
1134  const Instruction *I = nullptr) {
1135  const TargetLoweringBase *TLI = getTLI();
1136  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1137  assert(ISD && "Invalid opcode");
1138 
1139  // TODO: Handle other cost kinds.
1141  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1142  I);
1143 
1144  // Selects on vectors are actually vector selects.
1145  if (ISD == ISD::SELECT) {
1146  assert(CondTy && "CondTy must exist");
1147  if (CondTy->isVectorTy())
1148  ISD = ISD::VSELECT;
1149  }
1150  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(ValTy);
1151 
1152  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1153  !TLI->isOperationExpand(ISD, LT.second)) {
1154  // The operation is legal. Assume it costs 1. Multiply
1155  // by the type-legalization overhead.
1156  return LT.first * 1;
1157  }
1158 
1159  // Otherwise, assume that the cast is scalarized.
1160  // TODO: If one of the types get legalized by splitting, handle this
1161  // similarly to what getCastInstrCost() does.
1162  if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1163  if (isa<ScalableVectorType>(ValTy))
1164  return InstructionCost::getInvalid();
1165 
1166  unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1167  if (CondTy)
1168  CondTy = CondTy->getScalarType();
1169  InstructionCost Cost = thisT()->getCmpSelInstrCost(
1170  Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
1171 
1172  // Return the cost of multiple scalar invocation plus the cost of
1173  // inserting and extracting the values.
1174  return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
1175  }
1176 
1177  // Unknown scalar opcode.
1178  return 1;
1179  }
1180 
1181  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1182  unsigned Index) {
1183  return getRegUsageForType(Val->getScalarType());
1184  }
1185 
1187  unsigned Index) {
1188  return thisT()->getVectorInstrCost(I.getOpcode(), Val, Index);
1189  }
1190 
1191  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1192  int VF,
1193  const APInt &DemandedDstElts,
1195  assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
1196  "Unexpected size of DemandedDstElts.");
1197 
1199 
1200  auto *SrcVT = FixedVectorType::get(EltTy, VF);
1201  auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
1202 
1203  // The Mask shuffling cost is extract all the elements of the Mask
1204  // and insert each of them Factor times into the wide vector:
1205  //
1206  // E.g. an interleaved group with factor 3:
1207  // %mask = icmp ult <8 x i32> %vec1, %vec2
1208  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1209  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1210  // The cost is estimated as extract all mask elements from the <8xi1> mask
1211  // vector and insert them factor times into the <24xi1> shuffled mask
1212  // vector.
1213  APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
1214  Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1215  /*Insert*/ false,
1216  /*Extract*/ true);
1217  Cost +=
1218  thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1219  /*Insert*/ true, /*Extract*/ false);
1220 
1221  return Cost;
1222  }
1223 
1225  getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
1228  const Instruction *I = nullptr) {
1229  assert(!Src->isVoidTy() && "Invalid type");
1230  // Assume types, such as structs, are expensive.
1231  if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1232  return 4;
1233  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Src);
1234 
1235  // Assuming that all loads of legal types cost 1.
1236  InstructionCost Cost = LT.first;
1238  return Cost;
1239 
1240  const DataLayout &DL = this->getDataLayout();
1241  if (Src->isVectorTy() &&
1242  // In practice it's not currently possible to have a change in lane
1243  // length for extending loads or truncating stores so both types should
1244  // have the same scalable property.
1246  LT.second.getSizeInBits())) {
1247  // This is a vector load that legalizes to a larger type than the vector
1248  // itself. Unless the corresponding extending load or truncating store is
1249  // legal, then this will scalarize.
1251  EVT MemVT = getTLI()->getValueType(DL, Src);
1252  if (Opcode == Instruction::Store)
1253  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1254  else
1255  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1256 
1258  // This is a vector load/store for some illegal type that is scalarized.
1259  // We must account for the cost of building or decomposing the vector.
1260  Cost += getScalarizationOverhead(cast<VectorType>(Src),
1261  Opcode != Instruction::Store,
1262  Opcode == Instruction::Store);
1263  }
1264  }
1265 
1266  return Cost;
1267  }
1268 
1269  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
1270  Align Alignment, unsigned AddressSpace,
1272  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1273  CostKind);
1274  }
1275 
1276  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1277  const Value *Ptr, bool VariableMask,
1278  Align Alignment,
1280  const Instruction *I = nullptr) {
1281  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1282  true, CostKind);
1283  }
1284 
1286  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1287  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1288  bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1289 
1290  // We cannot scalarize scalable vectors, so return Invalid.
1291  if (isa<ScalableVectorType>(VecTy))
1292  return InstructionCost::getInvalid();
1293 
1294  auto *VT = cast<FixedVectorType>(VecTy);
1295 
1296  unsigned NumElts = VT->getNumElements();
1297  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1298 
1299  unsigned NumSubElts = NumElts / Factor;
1300  auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1301 
1302  // Firstly, the cost of load/store operation.
1304  if (UseMaskForCond || UseMaskForGaps)
1305  Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1307  else
1308  Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1309  CostKind);
1310 
1311  // Legalize the vector type, and get the legalized and unlegalized type
1312  // sizes.
1313  MVT VecTyLT = getTypeLegalizationCost(VecTy).second;
1314  unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1315  unsigned VecTyLTSize = VecTyLT.getStoreSize();
1316 
1317  // Scale the cost of the memory operation by the fraction of legalized
1318  // instructions that will actually be used. We shouldn't account for the
1319  // cost of dead instructions since they will be removed.
1320  //
1321  // E.g., An interleaved load of factor 8:
1322  // %vec = load <16 x i64>, <16 x i64>* %ptr
1323  // %v0 = shufflevector %vec, undef, <0, 8>
1324  //
1325  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1326  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1327  // type). The other loads are unused.
1328  //
1329  // TODO: Note that legalization can turn masked loads/stores into unmasked
1330  // (legalized) loads/stores. This can be reflected in the cost.
1331  if (Cost.isValid() && VecTySize > VecTyLTSize) {
1332  // The number of loads of a legal type it will take to represent a load
1333  // of the unlegalized vector type.
1334  unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1335 
1336  // The number of elements of the unlegalized type that correspond to a
1337  // single legal instruction.
1338  unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1339 
1340  // Determine which legal instructions will be used.
1341  BitVector UsedInsts(NumLegalInsts, false);
1342  for (unsigned Index : Indices)
1343  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1344  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1345 
1346  // Scale the cost of the load by the fraction of legal instructions that
1347  // will be used.
1348  Cost = divideCeil(UsedInsts.count() * *Cost.getValue(), NumLegalInsts);
1349  }
1350 
1351  // Then plus the cost of interleave operation.
1352  assert(Indices.size() <= Factor &&
1353  "Interleaved memory op has too many members");
1354 
1355  const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
1356  const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
1357 
1358  APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1359  for (unsigned Index : Indices) {
1360  assert(Index < Factor && "Invalid index for interleaved memory op");
1361  for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1362  DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1363  }
1364 
1365  if (Opcode == Instruction::Load) {
1366  // The interleave cost is similar to extract sub vectors' elements
1367  // from the wide vector, and insert them into sub vectors.
1368  //
1369  // E.g. An interleaved load of factor 2 (with one member of index 0):
1370  // %vec = load <8 x i32>, <8 x i32>* %ptr
1371  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1372  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1373  // <8 x i32> vector and insert them into a <4 x i32> vector.
1374  InstructionCost InsSubCost =
1375  thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1376  /*Insert*/ true, /*Extract*/ false);
1377  Cost += Indices.size() * InsSubCost;
1378  Cost +=
1379  thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1380  /*Insert*/ false, /*Extract*/ true);
1381  } else {
1382  // The interleave cost is extract elements from sub vectors, and
1383  // insert them into the wide vector.
1384  //
1385  // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
1386  // (using VF=4):
1387  // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
1388  // %gaps.mask = <true, true, false, true, true, false,
1389  // true, true, false, true, true, false>
1390  // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
1391  // i32 Align, <12 x i1> %gaps.mask
1392  // The cost is estimated as extract all elements (of actual members,
1393  // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
1394  // i32> vector.
1395  InstructionCost ExtSubCost =
1396  thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1397  /*Insert*/ false, /*Extract*/ true);
1398  Cost += ExtSubCost * Indices.size();
1399  Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1400  /*Insert*/ true,
1401  /*Extract*/ false);
1402  }
1403 
1404  if (!UseMaskForCond)
1405  return Cost;
1406 
1407  Type *I8Type = Type::getInt8Ty(VT->getContext());
1408 
1409  Cost += thisT()->getReplicationShuffleCost(
1410  I8Type, Factor, NumSubElts,
1411  UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1412  CostKind);
1413 
1414  // The Gaps mask is invariant and created outside the loop, therefore the
1415  // cost of creating it is not accounted for here. However if we have both
1416  // a MaskForGaps and some other mask that guards the execution of the
1417  // memory access, we need to account for the cost of And-ing the two masks
1418  // inside the loop.
1419  if (UseMaskForGaps) {
1420  auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1421  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1422  CostKind);
1423  }
1424 
1425  return Cost;
1426  }
1427 
1428  /// Get intrinsic cost based on arguments.
1431  // Check for generically free intrinsics.
1432  if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1433  return 0;
1434 
1435  // Assume that target intrinsics are cheap.
1436  Intrinsic::ID IID = ICA.getID();
1437  if (Function::isTargetIntrinsic(IID))
1439 
1440  if (ICA.isTypeBasedOnly())
1442 
1443  Type *RetTy = ICA.getReturnType();
1444 
1445  ElementCount RetVF =
1446  (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1447  : ElementCount::getFixed(1));
1448  const IntrinsicInst *I = ICA.getInst();
1450  FastMathFlags FMF = ICA.getFlags();
1451  switch (IID) {
1452  default:
1453  break;
1454 
1455  case Intrinsic::powi:
1456  if (auto *RHSC = dyn_cast<ConstantInt>(Args[1])) {
1457  bool ShouldOptForSize = I->getParent()->getParent()->hasOptSize();
1458  if (getTLI()->isBeneficialToExpandPowI(RHSC->getSExtValue(),
1459  ShouldOptForSize)) {
1460  // The cost is modeled on the expansion performed by ExpandPowI in
1461  // SelectionDAGBuilder.
1462  APInt Exponent = RHSC->getValue().abs();
1463  unsigned ActiveBits = Exponent.getActiveBits();
1464  unsigned PopCount = Exponent.countPopulation();
1465  InstructionCost Cost = (ActiveBits + PopCount - 2) *
1466  thisT()->getArithmeticInstrCost(
1467  Instruction::FMul, RetTy, CostKind);
1468  if (RHSC->getSExtValue() < 0)
1469  Cost += thisT()->getArithmeticInstrCost(Instruction::FDiv, RetTy,
1470  CostKind);
1471  return Cost;
1472  }
1473  }
1474  break;
1475  case Intrinsic::cttz:
1476  // FIXME: If necessary, this should go in target-specific overrides.
1477  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz(RetTy))
1479  break;
1480 
1481  case Intrinsic::ctlz:
1482  // FIXME: If necessary, this should go in target-specific overrides.
1483  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz(RetTy))
1485  break;
1486 
1487  case Intrinsic::memcpy:
1488  return thisT()->getMemcpyCost(ICA.getInst());
1489 
1490  case Intrinsic::masked_scatter: {
1491  const Value *Mask = Args[3];
1492  bool VarMask = !isa<Constant>(Mask);
1493  Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1494  return thisT()->getGatherScatterOpCost(Instruction::Store,
1495  ICA.getArgTypes()[0], Args[1],
1496  VarMask, Alignment, CostKind, I);
1497  }
1498  case Intrinsic::masked_gather: {
1499  const Value *Mask = Args[2];
1500  bool VarMask = !isa<Constant>(Mask);
1501  Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1502  return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1503  VarMask, Alignment, CostKind, I);
1504  }
1505  case Intrinsic::experimental_stepvector: {
1506  if (isa<ScalableVectorType>(RetTy))
1508  // The cost of materialising a constant integer vector.
1510  }
1511  case Intrinsic::vector_extract: {
1512  // FIXME: Handle case where a scalable vector is extracted from a scalable
1513  // vector
1514  if (isa<ScalableVectorType>(RetTy))
1516  unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1517  return thisT()->getShuffleCost(
1518  TTI::SK_ExtractSubvector, cast<VectorType>(Args[0]->getType()),
1519  None, CostKind, Index, cast<VectorType>(RetTy));
1520  }
1521  case Intrinsic::vector_insert: {
1522  // FIXME: Handle case where a scalable vector is inserted into a scalable
1523  // vector
1524  if (isa<ScalableVectorType>(Args[1]->getType()))
1526  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1527  return thisT()->getShuffleCost(
1528  TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None,
1529  CostKind, Index, cast<VectorType>(Args[1]->getType()));
1530  }
1531  case Intrinsic::experimental_vector_reverse: {
1532  return thisT()->getShuffleCost(TTI::SK_Reverse,
1533  cast<VectorType>(Args[0]->getType()), None,
1534  CostKind, 0, cast<VectorType>(RetTy));
1535  }
1536  case Intrinsic::experimental_vector_splice: {
1537  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1538  return thisT()->getShuffleCost(TTI::SK_Splice,
1539  cast<VectorType>(Args[0]->getType()), None,
1540  CostKind, Index, cast<VectorType>(RetTy));
1541  }
1542  case Intrinsic::vector_reduce_add:
1543  case Intrinsic::vector_reduce_mul:
1544  case Intrinsic::vector_reduce_and:
1545  case Intrinsic::vector_reduce_or:
1546  case Intrinsic::vector_reduce_xor:
1547  case Intrinsic::vector_reduce_smax:
1548  case Intrinsic::vector_reduce_smin:
1549  case Intrinsic::vector_reduce_fmax:
1550  case Intrinsic::vector_reduce_fmin:
1551  case Intrinsic::vector_reduce_umax:
1552  case Intrinsic::vector_reduce_umin: {
1553  IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1555  }
1556  case Intrinsic::vector_reduce_fadd:
1557  case Intrinsic::vector_reduce_fmul: {
1559  IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1561  }
1562  case Intrinsic::fshl:
1563  case Intrinsic::fshr: {
1564  const Value *X = Args[0];
1565  const Value *Y = Args[1];
1566  const Value *Z = Args[2];
1567  const TTI::OperandValueInfo OpInfoX = TTI::getOperandInfo(X);
1568  const TTI::OperandValueInfo OpInfoY = TTI::getOperandInfo(Y);
1569  const TTI::OperandValueInfo OpInfoZ = TTI::getOperandInfo(Z);
1570  const TTI::OperandValueInfo OpInfoBW =
1573  : TTI::OP_None};
1574 
1575  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1576  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1577  InstructionCost Cost = 0;
1578  Cost +=
1579  thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1580  Cost +=
1581  thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1582  Cost += thisT()->getArithmeticInstrCost(
1583  BinaryOperator::Shl, RetTy, CostKind, OpInfoX,
1584  {OpInfoZ.Kind, TTI::OP_None});
1585  Cost += thisT()->getArithmeticInstrCost(
1586  BinaryOperator::LShr, RetTy, CostKind, OpInfoY,
1587  {OpInfoZ.Kind, TTI::OP_None});
1588  // Non-constant shift amounts requires a modulo.
1589  if (!OpInfoZ.isConstant())
1590  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1591  CostKind, OpInfoZ, OpInfoBW);
1592  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1593  if (X != Y) {
1594  Type *CondTy = RetTy->getWithNewBitWidth(1);
1595  Cost +=
1596  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1598  Cost +=
1599  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1601  }
1602  return Cost;
1603  }
1604  case Intrinsic::get_active_lane_mask: {
1605  EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1606  EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1607 
1608  // If we're not expanding the intrinsic then we assume this is cheap
1609  // to implement.
1610  if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1611  return getTypeLegalizationCost(RetTy).first;
1612  }
1613 
1614  // Create the expanded types that will be used to calculate the uadd_sat
1615  // operation.
1616  Type *ExpRetTy = VectorType::get(
1617  ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1618  IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
1620  thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1621  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1623  return Cost;
1624  }
1625  }
1626 
1627  // Assume that we need to scalarize this intrinsic.
1628  // Compute the scalarization overhead based on Args for a vector
1629  // intrinsic.
1630  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
1631  if (RetVF.isVector() && !RetVF.isScalable()) {
1632  ScalarizationCost = 0;
1633  if (!RetTy->isVoidTy())
1634  ScalarizationCost +=
1635  getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1636  ScalarizationCost +=
1638  }
1639 
1640  IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1641  ScalarizationCost);
1642  return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1643  }
1644 
1645  /// Get intrinsic cost based on argument types.
1646  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1647  /// cost of scalarizing the arguments and the return value will be computed
1648  /// based on types.
1652  Intrinsic::ID IID = ICA.getID();
1653  Type *RetTy = ICA.getReturnType();
1654  const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1655  FastMathFlags FMF = ICA.getFlags();
1656  InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
1657  bool SkipScalarizationCost = ICA.skipScalarizationCost();
1658 
1659  VectorType *VecOpTy = nullptr;
1660  if (!Tys.empty()) {
1661  // The vector reduction operand is operand 0 except for fadd/fmul.
1662  // Their operand 0 is a scalar start value, so the vector op is operand 1.
1663  unsigned VecTyIndex = 0;
1664  if (IID == Intrinsic::vector_reduce_fadd ||
1665  IID == Intrinsic::vector_reduce_fmul)
1666  VecTyIndex = 1;
1667  assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1668  VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1669  }
1670 
1671  // Library call cost - other than size, make it expensive.
1672  unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
1673  unsigned ISD = 0;
1674  switch (IID) {
1675  default: {
1676  // Scalable vectors cannot be scalarized, so return Invalid.
1677  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1678  return isa<ScalableVectorType>(Ty);
1679  }))
1680  return InstructionCost::getInvalid();
1681 
1682  // Assume that we need to scalarize this intrinsic.
1683  InstructionCost ScalarizationCost =
1684  SkipScalarizationCost ? ScalarizationCostPassed : 0;
1685  unsigned ScalarCalls = 1;
1686  Type *ScalarRetTy = RetTy;
1687  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1688  if (!SkipScalarizationCost)
1689  ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1690  ScalarCalls = std::max(ScalarCalls,
1691  cast<FixedVectorType>(RetVTy)->getNumElements());
1692  ScalarRetTy = RetTy->getScalarType();
1693  }
1694  SmallVector<Type *, 4> ScalarTys;
1695  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1696  Type *Ty = Tys[i];
1697  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1698  if (!SkipScalarizationCost)
1699  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1700  ScalarCalls = std::max(ScalarCalls,
1701  cast<FixedVectorType>(VTy)->getNumElements());
1702  Ty = Ty->getScalarType();
1703  }
1704  ScalarTys.push_back(Ty);
1705  }
1706  if (ScalarCalls == 1)
1707  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1708 
1709  IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1710  InstructionCost ScalarCost =
1711  thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1712 
1713  return ScalarCalls * ScalarCost + ScalarizationCost;
1714  }
1715  // Look for intrinsics that can be lowered directly or turned into a scalar
1716  // intrinsic call.
1717  case Intrinsic::sqrt:
1718  ISD = ISD::FSQRT;
1719  break;
1720  case Intrinsic::sin:
1721  ISD = ISD::FSIN;
1722  break;
1723  case Intrinsic::cos:
1724  ISD = ISD::FCOS;
1725  break;
1726  case Intrinsic::exp:
1727  ISD = ISD::FEXP;
1728  break;
1729  case Intrinsic::exp2:
1730  ISD = ISD::FEXP2;
1731  break;
1732  case Intrinsic::log:
1733  ISD = ISD::FLOG;
1734  break;
1735  case Intrinsic::log10:
1736  ISD = ISD::FLOG10;
1737  break;
1738  case Intrinsic::log2:
1739  ISD = ISD::FLOG2;
1740  break;
1741  case Intrinsic::fabs:
1742  ISD = ISD::FABS;
1743  break;
1744  case Intrinsic::canonicalize:
1745  ISD = ISD::FCANONICALIZE;
1746  break;
1747  case Intrinsic::minnum:
1748  ISD = ISD::FMINNUM;
1749  break;
1750  case Intrinsic::maxnum:
1751  ISD = ISD::FMAXNUM;
1752  break;
1753  case Intrinsic::minimum:
1754  ISD = ISD::FMINIMUM;
1755  break;
1756  case Intrinsic::maximum:
1757  ISD = ISD::FMAXIMUM;
1758  break;
1759  case Intrinsic::copysign:
1760  ISD = ISD::FCOPYSIGN;
1761  break;
1762  case Intrinsic::floor:
1763  ISD = ISD::FFLOOR;
1764  break;
1765  case Intrinsic::ceil:
1766  ISD = ISD::FCEIL;
1767  break;
1768  case Intrinsic::trunc:
1769  ISD = ISD::FTRUNC;
1770  break;
1771  case Intrinsic::nearbyint:
1772  ISD = ISD::FNEARBYINT;
1773  break;
1774  case Intrinsic::rint:
1775  ISD = ISD::FRINT;
1776  break;
1777  case Intrinsic::round:
1778  ISD = ISD::FROUND;
1779  break;
1780  case Intrinsic::roundeven:
1781  ISD = ISD::FROUNDEVEN;
1782  break;
1783  case Intrinsic::pow:
1784  ISD = ISD::FPOW;
1785  break;
1786  case Intrinsic::fma:
1787  ISD = ISD::FMA;
1788  break;
1789  case Intrinsic::fmuladd:
1790  ISD = ISD::FMA;
1791  break;
1792  case Intrinsic::experimental_constrained_fmuladd:
1793  ISD = ISD::STRICT_FMA;
1794  break;
1795  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1796  case Intrinsic::lifetime_start:
1797  case Intrinsic::lifetime_end:
1798  case Intrinsic::sideeffect:
1799  case Intrinsic::pseudoprobe:
1800  case Intrinsic::arithmetic_fence:
1801  return 0;
1802  case Intrinsic::masked_store: {
1803  Type *Ty = Tys[0];
1804  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1805  return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1806  CostKind);
1807  }
1808  case Intrinsic::masked_load: {
1809  Type *Ty = RetTy;
1810  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1811  return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1812  CostKind);
1813  }
1814  case Intrinsic::vector_reduce_add:
1815  return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1816  None, CostKind);
1817  case Intrinsic::vector_reduce_mul:
1818  return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1819  None, CostKind);
1820  case Intrinsic::vector_reduce_and:
1821  return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1822  None, CostKind);
1823  case Intrinsic::vector_reduce_or:
1824  return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None,
1825  CostKind);
1826  case Intrinsic::vector_reduce_xor:
1827  return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1828  None, CostKind);
1829  case Intrinsic::vector_reduce_fadd:
1830  return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1831  FMF, CostKind);
1832  case Intrinsic::vector_reduce_fmul:
1833  return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1834  FMF, CostKind);
1835  case Intrinsic::vector_reduce_smax:
1836  case Intrinsic::vector_reduce_smin:
1837  case Intrinsic::vector_reduce_fmax:
1838  case Intrinsic::vector_reduce_fmin:
1839  return thisT()->getMinMaxReductionCost(
1840  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1841  /*IsUnsigned=*/false, CostKind);
1842  case Intrinsic::vector_reduce_umax:
1843  case Intrinsic::vector_reduce_umin:
1844  return thisT()->getMinMaxReductionCost(
1845  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1846  /*IsUnsigned=*/true, CostKind);
1847  case Intrinsic::abs: {
1848  // abs(X) = select(icmp(X,0),X,sub(0,X))
1849  Type *CondTy = RetTy->getWithNewBitWidth(1);
1851  InstructionCost Cost = 0;
1852  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1853  Pred, CostKind);
1854  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1855  Pred, CostKind);
1856  // TODO: Should we add an OperandValueProperties::OP_Zero property?
1857  Cost += thisT()->getArithmeticInstrCost(
1858  BinaryOperator::Sub, RetTy, CostKind, {TTI::OK_UniformConstantValue, TTI::OP_None});
1859  return Cost;
1860  }
1861  case Intrinsic::smax:
1862  case Intrinsic::smin:
1863  case Intrinsic::umax:
1864  case Intrinsic::umin: {
1865  // minmax(X,Y) = select(icmp(X,Y),X,Y)
1866  Type *CondTy = RetTy->getWithNewBitWidth(1);
1867  bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
1868  CmpInst::Predicate Pred =
1869  IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
1870  InstructionCost Cost = 0;
1871  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1872  Pred, CostKind);
1873  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1874  Pred, CostKind);
1875  return Cost;
1876  }
1877  case Intrinsic::sadd_sat:
1878  case Intrinsic::ssub_sat: {
1879  Type *CondTy = RetTy->getWithNewBitWidth(1);
1880 
1881  Type *OpTy = StructType::create({RetTy, CondTy});
1882  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1883  ? Intrinsic::sadd_with_overflow
1884  : Intrinsic::ssub_with_overflow;
1886 
1887  // SatMax -> Overflow && SumDiff < 0
1888  // SatMin -> Overflow && SumDiff >= 0
1889  InstructionCost Cost = 0;
1890  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1891  nullptr, ScalarizationCostPassed);
1892  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1893  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1894  Pred, CostKind);
1895  Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1896  CondTy, Pred, CostKind);
1897  return Cost;
1898  }
1899  case Intrinsic::uadd_sat:
1900  case Intrinsic::usub_sat: {
1901  Type *CondTy = RetTy->getWithNewBitWidth(1);
1902 
1903  Type *OpTy = StructType::create({RetTy, CondTy});
1904  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1905  ? Intrinsic::uadd_with_overflow
1906  : Intrinsic::usub_with_overflow;
1907 
1908  InstructionCost Cost = 0;
1909  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1910  nullptr, ScalarizationCostPassed);
1911  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1912  Cost +=
1913  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1915  return Cost;
1916  }
1917  case Intrinsic::smul_fix:
1918  case Intrinsic::umul_fix: {
1919  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1920  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1921 
1922  unsigned ExtOp =
1923  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1925 
1926  InstructionCost Cost = 0;
1927  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1928  Cost +=
1929  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1930  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1931  CCH, CostKind);
1932  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1933  CostKind,
1936  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1939  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1940  return Cost;
1941  }
1942  case Intrinsic::sadd_with_overflow:
1943  case Intrinsic::ssub_with_overflow: {
1944  Type *SumTy = RetTy->getContainedType(0);
1945  Type *OverflowTy = RetTy->getContainedType(1);
1946  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1948  : BinaryOperator::Sub;
1949 
1950  // Add:
1951  // Overflow -> (Result < LHS) ^ (RHS < 0)
1952  // Sub:
1953  // Overflow -> (Result < LHS) ^ (RHS > 0)
1954  InstructionCost Cost = 0;
1955  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1956  Cost += 2 * thisT()->getCmpSelInstrCost(
1957  Instruction::ICmp, SumTy, OverflowTy,
1959  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
1960  CostKind);
1961  return Cost;
1962  }
1963  case Intrinsic::uadd_with_overflow:
1964  case Intrinsic::usub_with_overflow: {
1965  Type *SumTy = RetTy->getContainedType(0);
1966  Type *OverflowTy = RetTy->getContainedType(1);
1967  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1969  : BinaryOperator::Sub;
1970  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
1973 
1974  InstructionCost Cost = 0;
1975  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1976  Cost +=
1977  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1978  Pred, CostKind);
1979  return Cost;
1980  }
1981  case Intrinsic::smul_with_overflow:
1982  case Intrinsic::umul_with_overflow: {
1983  Type *MulTy = RetTy->getContainedType(0);
1984  Type *OverflowTy = RetTy->getContainedType(1);
1985  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1986  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1987  bool IsSigned = IID == Intrinsic::smul_with_overflow;
1988 
1989  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
1991 
1992  InstructionCost Cost = 0;
1993  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
1994  Cost +=
1995  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1996  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1997  CCH, CostKind);
1998  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
1999  CostKind,
2002 
2003  if (IsSigned)
2004  Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
2005  CostKind,
2008 
2009  Cost += thisT()->getCmpSelInstrCost(
2010  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
2011  return Cost;
2012  }
2013  case Intrinsic::fptosi_sat:
2014  case Intrinsic::fptoui_sat: {
2015  if (Tys.empty())
2016  break;
2017  Type *FromTy = Tys[0];
2018  bool IsSigned = IID == Intrinsic::fptosi_sat;
2019 
2020  InstructionCost Cost = 0;
2022  {FromTy, FromTy});
2023  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
2025  {FromTy, FromTy});
2026  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
2027  Cost += thisT()->getCastInstrCost(
2028  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
2030  if (IsSigned) {
2031  Type *CondTy = RetTy->getWithNewBitWidth(1);
2032  Cost += thisT()->getCmpSelInstrCost(
2033  BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
2034  Cost += thisT()->getCmpSelInstrCost(
2036  }
2037  return Cost;
2038  }
2039  case Intrinsic::ctpop:
2040  ISD = ISD::CTPOP;
2041  // In case of legalization use TCC_Expensive. This is cheaper than a
2042  // library call but still not a cheap instruction.
2043  SingleCallCost = TargetTransformInfo::TCC_Expensive;
2044  break;
2045  case Intrinsic::ctlz:
2046  ISD = ISD::CTLZ;
2047  break;
2048  case Intrinsic::cttz:
2049  ISD = ISD::CTTZ;
2050  break;
2051  case Intrinsic::bswap:
2052  ISD = ISD::BSWAP;
2053  break;
2054  case Intrinsic::bitreverse:
2055  ISD = ISD::BITREVERSE;
2056  break;
2057  }
2058 
2059  const TargetLoweringBase *TLI = getTLI();
2060  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(RetTy);
2061 
2062  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
2063  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
2064  TLI->isFAbsFree(LT.second)) {
2065  return 0;
2066  }
2067 
2068  // The operation is legal. Assume it costs 1.
2069  // If the type is split to multiple registers, assume that there is some
2070  // overhead to this.
2071  // TODO: Once we have extract/insert subvector cost we need to use them.
2072  if (LT.first > 1)
2073  return (LT.first * 2);
2074  else
2075  return (LT.first * 1);
2076  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
2077  // If the operation is custom lowered then assume
2078  // that the code is twice as expensive.
2079  return (LT.first * 2);
2080  }
2081 
2082  // If we can't lower fmuladd into an FMA estimate the cost as a floating
2083  // point mul followed by an add.
2084  if (IID == Intrinsic::fmuladd)
2085  return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2086  CostKind) +
2087  thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2088  CostKind);
2089  if (IID == Intrinsic::experimental_constrained_fmuladd) {
2090  IntrinsicCostAttributes FMulAttrs(
2091  Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2092  IntrinsicCostAttributes FAddAttrs(
2093  Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2094  return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
2095  thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
2096  }
2097 
2098  // Else, assume that we need to scalarize this intrinsic. For math builtins
2099  // this will emit a costly libcall, adding call overhead and spills. Make it
2100  // very expensive.
2101  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2102  // Scalable vectors cannot be scalarized, so return Invalid.
2103  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2104  return isa<ScalableVectorType>(Ty);
2105  }))
2106  return InstructionCost::getInvalid();
2107 
2108  InstructionCost ScalarizationCost =
2109  SkipScalarizationCost ? ScalarizationCostPassed
2110  : getScalarizationOverhead(RetVTy, true, false);
2111 
2112  unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2113  SmallVector<Type *, 4> ScalarTys;
2114  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2115  Type *Ty = Tys[i];
2116  if (Ty->isVectorTy())
2117  Ty = Ty->getScalarType();
2118  ScalarTys.push_back(Ty);
2119  }
2120  IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2121  InstructionCost ScalarCost =
2122  thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2123  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2124  if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
2125  if (!ICA.skipScalarizationCost())
2126  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
2127  ScalarCalls = std::max(ScalarCalls,
2128  cast<FixedVectorType>(VTy)->getNumElements());
2129  }
2130  }
2131  return ScalarCalls * ScalarCost + ScalarizationCost;
2132  }
2133 
2134  // This is going to be turned into a library call, make it expensive.
2135  return SingleCallCost;
2136  }
2137 
2138  /// Compute a cost of the given call instruction.
2139  ///
2140  /// Compute the cost of calling function F with return type RetTy and
2141  /// argument types Tys. F might be nullptr, in this case the cost of an
2142  /// arbitrary call with the specified signature will be returned.
2143  /// This is used, for instance, when we estimate call of a vector
2144  /// counterpart of the given function.
2145  /// \param F Called function, might be nullptr.
2146  /// \param RetTy Return value types.
2147  /// \param Tys Argument types.
2148  /// \returns The cost of Call instruction.
2150  ArrayRef<Type *> Tys,
2152  return 10;
2153  }
2154 
2155  unsigned getNumberOfParts(Type *Tp) {
2156  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Tp);
2157  return LT.first.isValid() ? *LT.first.getValue() : 0;
2158  }
2159 
2161  const SCEV *) {
2162  return 0;
2163  }
2164 
2165  /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
2166  /// We're assuming that reduction operation are performing the following way:
2167  ///
2168  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
2169  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
2170  /// \----------------v-------------/ \----------v------------/
2171  /// n/2 elements n/2 elements
2172  /// %red1 = op <n x t> %val, <n x t> val1
2173  /// After this operation we have a vector %red1 where only the first n/2
2174  /// elements are meaningful, the second n/2 elements are undefined and can be
2175  /// dropped. All other operations are actually working with the vector of
2176  /// length n/2, not n, though the real vector length is still n.
2177  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
2178  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
2179  /// \----------------v-------------/ \----------v------------/
2180  /// n/4 elements 3*n/4 elements
2181  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
2182  /// length n/2, the resulting vector has length n/4 etc.
2183  ///
2184  /// The cost model should take into account that the actual length of the
2185  /// vector is reduced on each iteration.
2188  // Targets must implement a default value for the scalable case, since
2189  // we don't know how many lanes the vector has.
2190  if (isa<ScalableVectorType>(Ty))
2191  return InstructionCost::getInvalid();
2192 
2193  Type *ScalarTy = Ty->getElementType();
2194  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2195  if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2196  ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2197  NumVecElts >= 2) {
2198  // Or reduction for i1 is represented as:
2199  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2200  // %res = cmp ne iReduxWidth %val, 0
2201  // And reduction for i1 is represented as:
2202  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2203  // %res = cmp eq iReduxWidth %val, 11111
2204  Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2205  return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2207  thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2210  }
2211  unsigned NumReduxLevels = Log2_32(NumVecElts);
2212  InstructionCost ArithCost = 0;
2213  InstructionCost ShuffleCost = 0;
2214  std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2215  unsigned LongVectorCount = 0;
2216  unsigned MVTLen =
2217  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2218  while (NumVecElts > MVTLen) {
2219  NumVecElts /= 2;
2220  VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2221  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2222  CostKind, NumVecElts, SubTy);
2223  ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2224  Ty = SubTy;
2225  ++LongVectorCount;
2226  }
2227 
2228  NumReduxLevels -= LongVectorCount;
2229 
2230  // The minimal length of the vector is limited by the real length of vector
2231  // operations performed on the current platform. That's why several final
2232  // reduction operations are performed on the vectors with the same
2233  // architecture-dependent length.
2234 
2235  // By default reductions need one shuffle per reduction level.
2236  ShuffleCost +=
2237  NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2238  None, CostKind, 0, Ty);
2239  ArithCost +=
2240  NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2241  return ShuffleCost + ArithCost +
2242  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2243  }
2244 
2245  /// Try to calculate the cost of performing strict (in-order) reductions,
2246  /// which involves doing a sequence of floating point additions in lane
2247  /// order, starting with an initial value. For example, consider a scalar
2248  /// initial value 'InitVal' of type float and a vector of type <4 x float>:
2249  ///
2250  /// Vector = <float %v0, float %v1, float %v2, float %v3>
2251  ///
2252  /// %add1 = %InitVal + %v0
2253  /// %add2 = %add1 + %v1
2254  /// %add3 = %add2 + %v2
2255  /// %add4 = %add3 + %v3
2256  ///
2257  /// As a simple estimate we can say the cost of such a reduction is 4 times
2258  /// the cost of a scalar FP addition. We can only estimate the costs for
2259  /// fixed-width vectors here because for scalable vectors we do not know the
2260  /// runtime number of operations.
2263  // Targets must implement a default value for the scalable case, since
2264  // we don't know how many lanes the vector has.
2265  if (isa<ScalableVectorType>(Ty))
2266  return InstructionCost::getInvalid();
2267 
2268  auto *VTy = cast<FixedVectorType>(Ty);
2269  InstructionCost ExtractCost =
2270  getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true);
2271  InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2272  Opcode, VTy->getElementType(), CostKind);
2273  ArithCost *= VTy->getNumElements();
2274 
2275  return ExtractCost + ArithCost;
2276  }
2277 
2282  return getOrderedReductionCost(Opcode, Ty, CostKind);
2283  return getTreeReductionCost(Opcode, Ty, CostKind);
2284  }
2285 
2286  /// Try to calculate op costs for min/max reduction operations.
2287  /// \param CondTy Conditional type for the Select instruction.
2289  bool IsUnsigned,
2291  // Targets must implement a default value for the scalable case, since
2292  // we don't know how many lanes the vector has.
2293  if (isa<ScalableVectorType>(Ty))
2294  return InstructionCost::getInvalid();
2295 
2296  Type *ScalarTy = Ty->getElementType();
2297  Type *ScalarCondTy = CondTy->getElementType();
2298  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2299  unsigned NumReduxLevels = Log2_32(NumVecElts);
2300  unsigned CmpOpcode;
2301  if (Ty->isFPOrFPVectorTy()) {
2302  CmpOpcode = Instruction::FCmp;
2303  } else {
2304  assert(Ty->isIntOrIntVectorTy() &&
2305  "expecting floating point or integer type for min/max reduction");
2306  CmpOpcode = Instruction::ICmp;
2307  }
2308  InstructionCost MinMaxCost = 0;
2309  InstructionCost ShuffleCost = 0;
2310  std::pair<InstructionCost, MVT> LT = thisT()->getTypeLegalizationCost(Ty);
2311  unsigned LongVectorCount = 0;
2312  unsigned MVTLen =
2313  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2314  while (NumVecElts > MVTLen) {
2315  NumVecElts /= 2;
2316  auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2317  CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
2318 
2319  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
2320  None, CostKind, NumVecElts, SubTy);
2321  MinMaxCost +=
2322  thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2324  thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
2326  Ty = SubTy;
2327  ++LongVectorCount;
2328  }
2329 
2330  NumReduxLevels -= LongVectorCount;
2331 
2332  // The minimal length of the vector is limited by the real length of vector
2333  // operations performed on the current platform. That's why several final
2334  // reduction opertions are perfomed on the vectors with the same
2335  // architecture-dependent length.
2336  ShuffleCost +=
2337  NumReduxLevels * thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
2338  None, CostKind, 0, Ty);
2339  MinMaxCost +=
2340  NumReduxLevels *
2341  (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2343  thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2345  // The last min/max should be in vector registers and we counted it above.
2346  // So just need a single extractelement.
2347  return ShuffleCost + MinMaxCost +
2348  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2349  }
2350 
2351  InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
2352  Type *ResTy, VectorType *Ty,
2355  // Without any native support, this is equivalent to the cost of
2356  // vecreduce.opcode(ext(Ty A)).
2357  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2358  InstructionCost RedCost =
2359  thisT()->getArithmeticReductionCost(Opcode, ExtTy, FMF, CostKind);
2360  InstructionCost ExtCost = thisT()->getCastInstrCost(
2361  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2363 
2364  return RedCost + ExtCost;
2365  }
2366 
2368  VectorType *Ty,
2370  // Without any native support, this is equivalent to the cost of
2371  // vecreduce.add(mul(ext(Ty A), ext(Ty B))) or
2372  // vecreduce.add(mul(A, B)).
2373  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2374  InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2375  Instruction::Add, ExtTy, None, CostKind);
2376  InstructionCost ExtCost = thisT()->getCastInstrCost(
2377  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2379 
2380  InstructionCost MulCost =
2381  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2382 
2383  return RedCost + MulCost + 2 * ExtCost;
2384  }
2385 
2387 
2388  /// @}
2389 };
2390 
2391 /// Concrete BasicTTIImpl that can be used if no further customization
2392 /// is needed.
2393 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2395 
2397 
2398  const TargetSubtargetInfo *ST;
2399  const TargetLoweringBase *TLI;
2400 
2401  const TargetSubtargetInfo *getST() const { return ST; }
2402  const TargetLoweringBase *getTLI() const { return TLI; }
2403 
2404 public:
2405  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2406 };
2407 
2408 } // end namespace llvm
2409 
2410 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
llvm::ShuffleVectorInst::isZeroEltSplatMask
static bool isZeroEltSplatMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
Definition: Instructions.cpp:2240
llvm::MCSubtargetInfo::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: MCSubtargetInfo.cpp:359
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:929
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::BasicTTIImplBase::getVectorSplitCost
InstructionCost getVectorSplitCost()
Definition: BasicTTIImpl.h:2386
llvm::TargetLoweringBase::isTruncStoreLegal
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
Definition: TargetLowering.h:1330
llvm::BasicTTIImplBase::getFPOpCost
InstructionCost getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:503
ValueTypes.h
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:464
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:443
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:887
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:471
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:217
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1110
llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:741
llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition: TargetTransformInfoImpl.h:157
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:245
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1373
llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:467
MathExtras.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:439
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:190
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DataLayout::getTypeStoreSizeInBits
TypeSize getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:486
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::BasicTTIImplBase::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:648
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: TargetLowering.h:629
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:195
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:513
llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:376
llvm::BasicTTIImplBase::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:270
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1400
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:991
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:214
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:404
llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:304
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:282
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:546
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1373
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::BasicTTIImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1126
llvm::TargetLoweringBase::TypeScalarizeScalableVector
@ TypeScalarizeScalableVector
Definition: TargetLowering.h:215
llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:386
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:314
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:532
llvm::BasicTTIImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:617
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1044
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:627
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:152
llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:247
llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:256
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::LoopVectorizationLegality
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Definition: LoopVectorizationLegality.h:241
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:151
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:920
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:220
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetLowering.h:2577
APInt.h
llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:499
llvm::BasicTTIImplBase::~BasicTTIImplBase
virtual ~BasicTTIImplBase()=default
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:467
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition: TargetLowering.h:1319
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
llvm::TargetLoweringBase::isIndexedLoadLegal
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1358
llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:351
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:526
llvm::TargetLoweringBase::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I) const
Definition: TargetLowering.h:2714
llvm::BasicTTIImplBase::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
Definition: BasicTTIImpl.h:897
llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition: TargetLoweringBase.cpp:1629
llvm::Optional
Definition: APInt.h:33
llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:345
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::BasicTTIImplBase::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:666
llvm::SmallPtrSet< const BasicBlock *, 4 >
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:914
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1404
llvm::TargetTransformInfoImplCRTPBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:946
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:513
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2222
llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:691
llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition: TargetLowering.h:194
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetLoweringBase::TypeExpandInteger
@ TypeExpandInteger
Definition: TargetLowering.h:207
llvm::TargetTransformInfo::OperandValueInfo
Definition: TargetTransformInfo.h:921
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:895
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: BasicTTIImpl.h:355
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2137
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:1018
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:912
llvm::BasicTTIImplBase::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:268
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:154
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:885
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::LinearPolySize< TypeSize >::isKnownLT
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:328
TargetTransformInfoImpl.h
llvm::BasicTTIImplBase::getTreeReductionCost
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
Definition: BasicTTIImpl.h:2186
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1451
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:162
llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:295
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1264
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:153
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
MachineValueType.h
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:987
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::BasicTTIImplBase::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: BasicTTIImpl.h:319
llvm::ElementCount::isScalar
bool isScalar() const
Counting predicates.
Definition: TypeSize.h:414
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:46
llvm::BasicTTIImplBase::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:662
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:422
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:930
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2584
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Definition: TargetTransformInfoImpl.h:221
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
llvm::TargetTransformInfoImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
Definition: TargetTransformInfoImpl.h:163
TargetLowering.h
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:156
llvm::MCSubtargetInfo::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
Definition: MCSubtargetInfo.cpp:355
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:893
llvm::BasicTTIImplBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: BasicTTIImpl.h:783
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:539
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1177
llvm::MCSubtargetInfo::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
Definition: MCSubtargetInfo.cpp:351
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:518
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1280
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::TargetTransformInfoImplBase
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:33
llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:514
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::TargetTransformInfoImplBase::getCacheAssociativity
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: TargetTransformInfoImpl.h:466
Constants.h
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1181
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:913
InlinePriorityMode::Cost
@ Cost
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:884
llvm::BasicTTIImplBase::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis()
Definition: BasicTTIImpl.h:266
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Triple::isOSDarwin
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
Definition: Triple.h:509
llvm::BasicTTIImplBase::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:678
llvm::TargetLoweringBase::LegalizeKind
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Definition: TargetLowering.h:226
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1136
llvm::BasicTTIImplBase::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:2160
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::getTypeConversion
LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const
Return pair that represents the legalization kind (first) that needs to happen to EVT (second) in ord...
Definition: TargetLoweringBase.cpp:956
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2555
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:155
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:272
InstrTypes.h
llvm::BasicTTIImplBase::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1191
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2278
llvm::BasicTTIImplBase::getMulAccReductionCost
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2367
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:928
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1186
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:194
llvm::BasicTTIImplBase::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:291
llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:1117
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:232
llvm::BasicTTIImplBase::getCacheSize
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:642
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:819
llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:817
llvm::BasicTTIImplBase::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:391
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:729
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1403
llvm::InterleavedAccessInfo
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:751
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetLoweringBase.cpp:1906
llvm::TargetMachine::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: TargetMachine.h:323
llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent()
Definition: BasicTTIImpl.h:516
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:927
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:926
llvm::TargetTransformInfoImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition: TargetTransformInfoImpl.h:569
llvm::BasicTTIImplBase::getCacheLineSize
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:658
BitVector.h
llvm::TargetTransformInfoImplCRTPBase
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:936
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1373
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::TargetTransformInfo::OperandValueInfo::Kind
OperandValueKind Kind
Definition: TargetTransformInfo.h:922
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:346
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1131
llvm::PartialUnrollingThreshold
cl::opt< unsigned > PartialUnrollingThreshold
llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:402
llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:963
llvm::None
const NoneType None
Definition: None.h:24
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:999
Type.h
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables()
Definition: BasicTTIImpl.h:461
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1313
LoopInfo.h
llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfoImpl.h:223
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1373
llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: TargetTransformInfoImpl.h:187
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:918
llvm::BasicTTIImplBase::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask()
Definition: BasicTTIImpl.h:613
llvm::TargetLoweringBase::isBeneficialToExpandPowI
bool isBeneficialToExpandPowI(int Exponent, bool OptForSize) const
Return true if it is beneficial to expand an @llvm.powi.
Definition: TargetLowering.h:2239
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:886
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: TargetLowering.h:624
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:956
BasicBlock.h
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1080
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:308
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:683
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:891
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: TargetMachine.h:310
llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:622
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:921
llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:492
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1404
llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:631
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
uint64_t
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:722
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:400
llvm::APIntOps::ScaleBitMask
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, bool MatchAllBits=false)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2971
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition: TargetLowering.h:1305
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:936
llvm::TargetLoweringBase::getLoadExtAction
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition: TargetLowering.h:1293
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BasicTTIImplBase::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:696
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:907
llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition: TargetLowering.h:1255
llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:339
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition: TargetLowering.h:1191
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition: TargetTransformInfoImpl.h:181
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:414
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:577
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:154
llvm::TargetTransformInfoImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:525
llvm::BasicTTIImplBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:396
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
llvm::BasicTTIImplBase::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
Definition: BasicTTIImpl.h:933
ArrayRef.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:534
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1404
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetLowering.h:1722
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::BasicTTIImplBase::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:695
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:121
Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:60
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1285
llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1225
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:897
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:851
llvm::BasicTTIImpl::BasicTTIImpl
BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition: BasicTargetTransformInfo.cpp:32
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1373
llvm::FloatStyle::Exponent
@ Exponent
llvm::ElementCount::isVector
bool isVector() const
One or more elements.
Definition: TypeSize.h:416
llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfoImpl.h:489
llvm::TargetSubtargetInfo::useAA
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Definition: TargetSubtargetInfo.cpp:56
llvm::MCSubtargetInfo::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Definition: MCSubtargetInfo.cpp:363
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:674
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2127
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:530
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:957
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1105
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1373
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1597
DataLayout.h
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:49
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetMachine::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: TargetMachine.h:333
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:701
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::BasicTTIImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
Definition: BasicTTIImpl.h:605
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2553
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TargetSubtargetInfo.h
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:922
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1774
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:229
llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:578
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:923
llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:590
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1401
llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:493
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:905
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:728
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:890
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1404
llvm::BasicTTIImplBase::useAA
bool useAA() const
Definition: BasicTTIImpl.h:384
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::RISCVISD::LA
@ LA
Definition: RISCVISelLowering.h:314
llvm::TargetLoweringBase::TypeLegal
@ TypeLegal
Definition: TargetLowering.h:205
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1402
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition: TargetLoweringBase.cpp:944
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:913
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:346
llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:598
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2142
llvm::ShuffleVectorInst::isSpliceMask
static bool isSpliceMask(ArrayRef< int > Mask, int &Index)
Return true if this shuffle mask is a splice mask, concatenating the two inputs together and then ext...
Definition: Instructions.cpp:2299
Constant.h
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2554
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1302
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition: TargetLowering.h:2926
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:944
llvm::BasicTTIImpl
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:2393
llvm::KnownBits
Definition: KnownBits.h:23
llvm::BasicTTIImplBase::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:2149
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2556
llvm::TargetLoweringBase::isIndexedStoreLegal
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1372
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:481
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:916
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:924
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2252
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
llvm::TargetTransformInfo::getOperandInfo
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:725
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:915
ISDOpcodes.h
llvm::TypeSize
Definition: TypeSize.h:435
Casting.h
llvm::BasicTTIImplBase::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1269
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:199
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1280
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:380
llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:300
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:768
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::Function::isTargetIntrinsic
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
Definition: Function.cpp:747
llvm::ARCCC::Z
@ Z
Definition: ARCInfo.h:41
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
powi
This is blocked on not handling X *X *X powi(X, 3)(see note above). The issue is that we end up getting t
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:794
llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:277
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:967
llvm::PredicationStyle
PredicationStyle
Definition: TargetTransformInfo.h:165
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfoImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: TargetTransformInfoImpl.h:175
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1205
llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:37
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:96
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::BasicTTIImplBase::getExtendedReductionCost
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2351
llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2288
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: TargetLowering.h:2694
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::BasicTTIImplBase::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1276
Instructions.h
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:150
llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition: TargetLowering.h:1223
SmallVector.h
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:962
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:197
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:742
N
#define N
llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1429
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:2155
TargetTransformInfo.h
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2552
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2783
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1405
llvm::SmallVectorImpl< int >
llvm::TargetLoweringBase::getNumRegisters
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT, Optional< MVT > RegisterVT=None) const
Return the number of registers that this ValueType will eventually require.
Definition: TargetLowering.h:1579
llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:286
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1151
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:436
llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:1650
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::BasicTTIImplBase::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:363
llvm::MCSubtargetInfo::getCacheLineSize
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Definition: MCSubtargetInfo.cpp:347
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:244
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3278
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetTransformInfoImplBase::emitGetActiveLaneMask
PredicationStyle emitGetActiveLaneMask() const
Definition: TargetTransformInfoImpl.h:171
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1490
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:158
llvm::MCSubtargetInfo::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
Definition: MCSubtargetInfo.cpp:343
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:211
llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:637
llvm::TargetTransformInfo::OperandValueInfo::isConstant
bool isConstant() const
Definition: TargetTransformInfo.h:925
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:925
llvm::BasicTTIImplBase::getOrderedReductionCost
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
Definition: BasicTTIImpl.h:2261
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1288
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence()
Definition: BasicTTIImpl.h:264
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:919
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:343
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:218
llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:359
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:155
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::ShuffleVectorInst::isTransposeMask
static bool isTransposeMask(ArrayRef< int > Mask)
Return true if this shuffle mask is a transpose mask.
Definition: Instructions.cpp:2265
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:892
llvm::Triple::aarch64
@ aarch64
Definition: Triple.h:51
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2132
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::MCSubtargetInfo::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: MCSubtargetInfo.cpp:370
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:986
llvm::BasicTTIImplBase::shouldPrefetchAddressSpace
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
Definition: BasicTTIImpl.h:682
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722