LLVM  14.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Helper function to access this as a T.
83  T *thisT() { return static_cast<T *>(this); }
84 
85  /// Estimate a cost of Broadcast as an extract and sequence of insert
86  /// operations.
87  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) {
88  InstructionCost Cost = 0;
89  // Broadcast cost is equal to the cost of extracting the zero'th element
90  // plus the cost of inserting it into every element of the result vector.
91  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
92 
93  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
94  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) {
102  InstructionCost Cost = 0;
103  // Shuffle cost is equal to the cost of extracting element from its argument
104  // plus the cost of inserting them onto the result vector.
105 
106  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
107  // index 0 of first vector, index 1 of second vector,index 2 of first
108  // vector and finally index 3 of second vector and insert them at index
109  // <0,1,2,3> of result vector.
110  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
111  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
112  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
113  }
114  return Cost;
115  }
116 
117  /// Estimate a cost of subvector extraction as a sequence of extract and
118  /// insert operations.
119  InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index,
120  FixedVectorType *SubVTy) {
121  assert(VTy && SubVTy &&
122  "Can only extract subvectors from vectors");
123  int NumSubElts = SubVTy->getNumElements();
124  assert((!isa<FixedVectorType>(VTy) ||
125  (Index + NumSubElts) <=
126  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
127  "SK_ExtractSubvector index out of range");
128 
129  InstructionCost Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
135  i + Index);
136  Cost +=
137  thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index,
145  FixedVectorType *SubVTy) {
146  assert(VTy && SubVTy &&
147  "Can only insert subvectors into vectors");
148  int NumSubElts = SubVTy->getNumElements();
149  assert((!isa<FixedVectorType>(VTy) ||
150  (Index + NumSubElts) <=
151  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
152  "SK_InsertSubvector index out of range");
153 
154  InstructionCost Cost = 0;
155  // Subvector insertion cost is equal to the cost of extracting element from
156  // the source type plus the cost of inserting them into the result vector
157  // type.
158  for (int i = 0; i != NumSubElts; ++i) {
159  Cost +=
160  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
161  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
162  i + Index);
163  }
164  return Cost;
165  }
166 
167  /// Local query method delegates up to T which *must* implement this!
168  const TargetSubtargetInfo *getST() const {
169  return static_cast<const T *>(this)->getST();
170  }
171 
172  /// Local query method delegates up to T which *must* implement this!
173  const TargetLoweringBase *getTLI() const {
174  return static_cast<const T *>(this)->getTLI();
175  }
176 
177  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
178  switch (M) {
179  case TTI::MIM_Unindexed:
180  return ISD::UNINDEXED;
181  case TTI::MIM_PreInc:
182  return ISD::PRE_INC;
183  case TTI::MIM_PreDec:
184  return ISD::PRE_DEC;
185  case TTI::MIM_PostInc:
186  return ISD::POST_INC;
187  case TTI::MIM_PostDec:
188  return ISD::POST_DEC;
189  }
190  llvm_unreachable("Unexpected MemIndexedMode");
191  }
192 
193  InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
194  Align Alignment,
195  bool VariableMask,
196  bool IsGatherScatter,
198  auto *VT = cast<FixedVectorType>(DataTy);
199  // Assume the target does not have support for gather/scatter operations
200  // and provide a rough estimate.
201  //
202  // First, compute the cost of the individual memory operations.
203  InstructionCost AddrExtractCost =
204  IsGatherScatter
205  ? getVectorInstrCost(Instruction::ExtractElement,
207  PointerType::get(VT->getElementType(), 0),
208  VT->getNumElements()),
209  -1)
210  : 0;
211  InstructionCost LoadCost =
212  VT->getNumElements() *
213  (AddrExtractCost +
214  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
215 
216  // Next, compute the cost of packing the result in a vector.
218  VT, Opcode != Instruction::Store, Opcode == Instruction::Store);
219 
220  InstructionCost ConditionalCost = 0;
221  if (VariableMask) {
222  // Compute the cost of conditionally executing the memory operations with
223  // variable masks. This includes extracting the individual conditions, a
224  // branches and PHIs to combine the results.
225  // NOTE: Estimating the cost of conditionally executing the memory
226  // operations accurately is quite difficult and the current solution
227  // provides a very rough estimate only.
228  ConditionalCost =
229  VT->getNumElements() *
231  Instruction::ExtractElement,
233  VT->getNumElements()),
234  -1) +
235  getCFInstrCost(Instruction::Br, CostKind) +
236  getCFInstrCost(Instruction::PHI, CostKind));
237  }
238 
239  return LoadCost + PackingCost + ConditionalCost;
240  }
241 
242 protected:
243  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
244  : BaseT(DL) {}
245  virtual ~BasicTTIImplBase() = default;
246 
248 
249 public:
250  /// \name Scalar TTI Implementations
251  /// @{
253  unsigned AddressSpace, Align Alignment,
254  bool *Fast) const {
256  return getTLI()->allowsMisalignedMemoryAccesses(
257  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
258  }
259 
260  bool hasBranchDivergence() { return false; }
261 
262  bool useGPUDivergenceAnalysis() { return false; }
263 
264  bool isSourceOfDivergence(const Value *V) { return false; }
265 
266  bool isAlwaysUniform(const Value *V) { return false; }
267 
268  unsigned getFlatAddressSpace() {
269  // Return an invalid address space.
270  return -1;
271  }
272 
274  Intrinsic::ID IID) const {
275  return false;
276  }
277 
278  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
279  return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
280  }
281 
282  unsigned getAssumedAddrSpace(const Value *V) const {
283  return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
284  }
285 
287  Value *NewV) const {
288  return nullptr;
289  }
290 
291  bool isLegalAddImmediate(int64_t imm) {
292  return getTLI()->isLegalAddImmediate(imm);
293  }
294 
295  bool isLegalICmpImmediate(int64_t imm) {
296  return getTLI()->isLegalICmpImmediate(imm);
297  }
298 
299  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
300  bool HasBaseReg, int64_t Scale,
301  unsigned AddrSpace, Instruction *I = nullptr) {
303  AM.BaseGV = BaseGV;
304  AM.BaseOffs = BaseOffset;
305  AM.HasBaseReg = HasBaseReg;
306  AM.Scale = Scale;
307  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
308  }
309 
311  const DataLayout &DL) const {
312  EVT VT = getTLI()->getValueType(DL, Ty);
313  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
314  }
315 
317  const DataLayout &DL) const {
318  EVT VT = getTLI()->getValueType(DL, Ty);
319  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
320  }
321 
324  }
325 
328  }
329 
332  }
333 
335  int64_t BaseOffset, bool HasBaseReg,
336  int64_t Scale, unsigned AddrSpace) {
338  AM.BaseGV = BaseGV;
339  AM.BaseOffs = BaseOffset;
340  AM.HasBaseReg = HasBaseReg;
341  AM.Scale = Scale;
342  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
343  }
344 
345  bool isTruncateFree(Type *Ty1, Type *Ty2) {
346  return getTLI()->isTruncateFree(Ty1, Ty2);
347  }
348 
350  return getTLI()->isProfitableToHoist(I);
351  }
352 
353  bool useAA() const { return getST()->useAA(); }
354 
355  bool isTypeLegal(Type *Ty) {
356  EVT VT = getTLI()->getValueType(DL, Ty);
357  return getTLI()->isTypeLegal(VT);
358  }
359 
361  InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first;
362  assert(Val >= 0 && "Negative cost!");
363  return Val;
364  }
365 
366  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
369  return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
370  }
371 
373  unsigned &JumpTableSize,
374  ProfileSummaryInfo *PSI,
376  /// Try to find the estimated number of clusters. Note that the number of
377  /// clusters identified in this function could be different from the actual
378  /// numbers found in lowering. This function ignore switches that are
379  /// lowered with a mix of jump table / bit test / BTree. This function was
380  /// initially intended to be used when estimating the cost of switch in
381  /// inline cost heuristic, but it's a generic cost model to be used in other
382  /// places (e.g., in loop unrolling).
383  unsigned N = SI.getNumCases();
384  const TargetLoweringBase *TLI = getTLI();
385  const DataLayout &DL = this->getDataLayout();
386 
387  JumpTableSize = 0;
388  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
389 
390  // Early exit if both a jump table and bit test are not allowed.
391  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
392  return N;
393 
394  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
395  APInt MinCaseVal = MaxCaseVal;
396  for (auto CI : SI.cases()) {
397  const APInt &CaseVal = CI.getCaseValue()->getValue();
398  if (CaseVal.sgt(MaxCaseVal))
399  MaxCaseVal = CaseVal;
400  if (CaseVal.slt(MinCaseVal))
401  MinCaseVal = CaseVal;
402  }
403 
404  // Check if suitable for a bit test
405  if (N <= DL.getIndexSizeInBits(0u)) {
407  for (auto I : SI.cases())
408  Dests.insert(I.getCaseSuccessor());
409 
410  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
411  DL))
412  return 1;
413  }
414 
415  // Check if suitable for a jump table.
416  if (IsJTAllowed) {
417  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
418  return N;
419  uint64_t Range =
420  (MaxCaseVal - MinCaseVal)
421  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
422  // Check whether a range of clusters is dense enough for a jump table
423  if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
424  JumpTableSize = Range;
425  return 1;
426  }
427  }
428  return N;
429  }
430 
432  const TargetLoweringBase *TLI = getTLI();
435  }
436 
438  const TargetMachine &TM = getTLI()->getTargetMachine();
439  // If non-PIC mode, do not generate a relative lookup table.
440  if (!TM.isPositionIndependent())
441  return false;
442 
443  /// Relative lookup table entries consist of 32-bit offsets.
444  /// Do not generate relative lookup tables for large code models
445  /// in 64-bit achitectures where 32-bit offsets might not be enough.
446  if (TM.getCodeModel() == CodeModel::Medium ||
447  TM.getCodeModel() == CodeModel::Large)
448  return false;
449 
450  Triple TargetTriple = TM.getTargetTriple();
451  if (!TargetTriple.isArch64Bit())
452  return false;
453 
454  // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it
455  // there.
456  if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
457  return false;
458 
459  return true;
460  }
461 
462  bool haveFastSqrt(Type *Ty) {
463  const TargetLoweringBase *TLI = getTLI();
464  EVT VT = TLI->getValueType(DL, Ty);
465  return TLI->isTypeLegal(VT) &&
467  }
468 
470  return true;
471  }
472 
474  // Check whether FADD is available, as a proxy for floating-point in
475  // general.
476  const TargetLoweringBase *TLI = getTLI();
477  EVT VT = TLI->getValueType(DL, Ty);
481  }
482 
483  unsigned getInliningThresholdMultiplier() { return 1; }
484  unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
485 
486  int getInlinerVectorBonusPercent() { return 150; }
487 
491  // This unrolling functionality is target independent, but to provide some
492  // motivation for its intended use, for x86:
493 
494  // According to the Intel 64 and IA-32 Architectures Optimization Reference
495  // Manual, Intel Core models and later have a loop stream detector (and
496  // associated uop queue) that can benefit from partial unrolling.
497  // The relevant requirements are:
498  // - The loop must have no more than 4 (8 for Nehalem and later) branches
499  // taken, and none of them may be calls.
500  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
501 
502  // According to the Software Optimization Guide for AMD Family 15h
503  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
504  // and loop buffer which can benefit from partial unrolling.
505  // The relevant requirements are:
506  // - The loop must have fewer than 16 branches
507  // - The loop must have less than 40 uops in all executed loop branches
508 
509  // The number of taken branches in a loop is hard to estimate here, and
510  // benchmarking has revealed that it is better not to be conservative when
511  // estimating the branch count. As a result, we'll ignore the branch limits
512  // until someone finds a case where it matters in practice.
513 
514  unsigned MaxOps;
515  const TargetSubtargetInfo *ST = getST();
516  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
517  MaxOps = PartialUnrollingThreshold;
518  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
519  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
520  else
521  return;
522 
523  // Scan the loop: don't unroll loops with calls.
524  for (BasicBlock *BB : L->blocks()) {
525  for (Instruction &I : *BB) {
526  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
527  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
528  if (!thisT()->isLoweredToCall(F))
529  continue;
530  }
531 
532  if (ORE) {
533  ORE->emit([&]() {
534  return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
535  L->getHeader())
536  << "advising against unrolling the loop because it "
537  "contains a "
538  << ore::NV("Call", &I);
539  });
540  }
541  return;
542  }
543  }
544  }
545 
546  // Enable runtime and partial unrolling up to the specified size.
547  // Enable using trip count upper bound to unroll loops.
548  UP.Partial = UP.Runtime = UP.UpperBound = true;
549  UP.PartialThreshold = MaxOps;
550 
551  // Avoid unrolling when optimizing for size.
552  UP.OptSizeThreshold = 0;
554 
555  // Set number of instructions optimized when "back edge"
556  // becomes "fall through" to default value of 2.
557  UP.BEInsns = 2;
558  }
559 
562  PP.PeelCount = 0;
563  PP.AllowPeeling = true;
564  PP.AllowLoopNestsPeeling = false;
565  PP.PeelProfiledIterations = true;
566  }
567 
569  AssumptionCache &AC,
570  TargetLibraryInfo *LibInfo,
571  HardwareLoopInfo &HWLoopInfo) {
572  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
573  }
574 
577  DominatorTree *DT,
578  const LoopAccessInfo *LAI) {
579  return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
580  }
581 
584  }
585 
587  IntrinsicInst &II) {
588  return BaseT::instCombineIntrinsic(IC, II);
589  }
590 
592  IntrinsicInst &II,
593  APInt DemandedMask,
594  KnownBits &Known,
595  bool &KnownBitsComputed) {
596  return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
597  KnownBitsComputed);
598  }
599 
601  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
602  APInt &UndefElts2, APInt &UndefElts3,
603  std::function<void(Instruction *, unsigned, APInt, APInt &)>
604  SimplifyAndSetOp) {
606  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
607  SimplifyAndSetOp);
608  }
609 
611  if (isa<LoadInst>(I))
612  return getST()->getSchedModel().DefaultLoadLatency;
613 
615  }
616 
617  virtual Optional<unsigned>
619  return Optional<unsigned>(
620  getST()->getCacheSize(static_cast<unsigned>(Level)));
621  }
622 
623  virtual Optional<unsigned>
625  Optional<unsigned> TargetResult =
626  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
627 
628  if (TargetResult)
629  return TargetResult;
630 
632  }
633 
634  virtual unsigned getCacheLineSize() const {
635  return getST()->getCacheLineSize();
636  }
637 
638  virtual unsigned getPrefetchDistance() const {
639  return getST()->getPrefetchDistance();
640  }
641 
642  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
643  unsigned NumStridedMemAccesses,
644  unsigned NumPrefetches,
645  bool HasCall) const {
646  return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
647  NumPrefetches, HasCall);
648  }
649 
650  virtual unsigned getMaxPrefetchIterationsAhead() const {
651  return getST()->getMaxPrefetchIterationsAhead();
652  }
653 
654  virtual bool enableWritePrefetching() const {
655  return getST()->enableWritePrefetching();
656  }
657 
658  /// @}
659 
660  /// \name Vector TTI Implementations
661  /// @{
662 
664  return TypeSize::getFixed(32);
665  }
666 
668 
669  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
670  /// are set if the demanded result elements need to be inserted and/or
671  /// extracted from vectors.
673  const APInt &DemandedElts,
674  bool Insert, bool Extract) {
675  /// FIXME: a bitfield is not a reasonable abstraction for talking about
676  /// which elements are needed from a scalable vector
677  auto *Ty = cast<FixedVectorType>(InTy);
678 
679  assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
680  "Vector size mismatch");
681 
682  InstructionCost Cost = 0;
683 
684  for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
685  if (!DemandedElts[i])
686  continue;
687  if (Insert)
688  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
689  if (Extract)
690  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
691  }
692 
693  return Cost;
694  }
695 
696  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
698  bool Extract) {
699  auto *Ty = cast<FixedVectorType>(InTy);
700 
701  APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
702  return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
703  }
704 
705  /// Estimate the overhead of scalarizing an instructions unique
706  /// non-constant operands. The (potentially vector) types to use for each of
707  /// argument are passes via Tys.
709  ArrayRef<Type *> Tys) {
710  assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
711 
712  InstructionCost Cost = 0;
713  SmallPtrSet<const Value*, 4> UniqueOperands;
714  for (int I = 0, E = Args.size(); I != E; I++) {
715  // Disregard things like metadata arguments.
716  const Value *A = Args[I];
717  Type *Ty = Tys[I];
718  if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
719  !Ty->isPtrOrPtrVectorTy())
720  continue;
721 
722  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
723  if (auto *VecTy = dyn_cast<VectorType>(Ty))
724  Cost += getScalarizationOverhead(VecTy, false, true);
725  }
726  }
727 
728  return Cost;
729  }
730 
731  /// Estimate the overhead of scalarizing the inputs and outputs of an
732  /// instruction, with return type RetTy and arguments Args of type Tys. If
733  /// Args are unknown (empty), then the cost associated with one argument is
734  /// added as a heuristic.
737  ArrayRef<Type *> Tys) {
738  InstructionCost Cost = getScalarizationOverhead(RetTy, true, false);
739  if (!Args.empty())
741  else
742  // When no information on arguments is provided, we add the cost
743  // associated with one argument as a heuristic.
744  Cost += getScalarizationOverhead(RetTy, false, true);
745 
746  return Cost;
747  }
748 
749  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
750 
752  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
758  const Instruction *CxtI = nullptr) {
759  // Check if any of the operands are vector operands.
760  const TargetLoweringBase *TLI = getTLI();
761  int ISD = TLI->InstructionOpcodeToISD(Opcode);
762  assert(ISD && "Invalid opcode");
763 
764  // TODO: Handle more cost kinds.
766  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
767  Opd1Info, Opd2Info,
768  Opd1PropInfo, Opd2PropInfo,
769  Args, CxtI);
770 
771  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
772 
773  bool IsFloat = Ty->isFPOrFPVectorTy();
774  // Assume that floating point arithmetic operations cost twice as much as
775  // integer operations.
776  InstructionCost OpCost = (IsFloat ? 2 : 1);
777 
778  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
779  // The operation is legal. Assume it costs 1.
780  // TODO: Once we have extract/insert subvector cost we need to use them.
781  return LT.first * OpCost;
782  }
783 
784  if (!TLI->isOperationExpand(ISD, LT.second)) {
785  // If the operation is custom lowered, then assume that the code is twice
786  // as expensive.
787  return LT.first * 2 * OpCost;
788  }
789 
790  // An 'Expand' of URem and SRem is special because it may default
791  // to expanding the operation into a sequence of sub-operations
792  // i.e. X % Y -> X-(X/Y)*Y.
793  if (ISD == ISD::UREM || ISD == ISD::SREM) {
794  bool IsSigned = ISD == ISD::SREM;
795  if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
796  LT.second) ||
797  TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
798  LT.second)) {
799  unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
800  InstructionCost DivCost = thisT()->getArithmeticInstrCost(
801  DivOpc, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo,
802  Opd2PropInfo);
803  InstructionCost MulCost =
804  thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
805  InstructionCost SubCost =
806  thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
807  return DivCost + MulCost + SubCost;
808  }
809  }
810 
811  // We cannot scalarize scalable vectors, so return Invalid.
812  if (isa<ScalableVectorType>(Ty))
814 
815  // Else, assume that we need to scalarize this op.
816  // TODO: If one of the types get legalized by splitting, handle this
817  // similarly to what getCastInstrCost() does.
818  if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
819  InstructionCost Cost = thisT()->getArithmeticInstrCost(
820  Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
821  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
822  // Return the cost of multiple scalar invocation plus the cost of
823  // inserting and extracting the values.
824  SmallVector<Type *> Tys(Args.size(), Ty);
825  return getScalarizationOverhead(VTy, Args, Tys) +
826  VTy->getNumElements() * Cost;
827  }
828 
829  // We don't know anything about this scalar instruction.
830  return OpCost;
831  }
832 
834  ArrayRef<int> Mask) const {
835  int Limit = Mask.size() * 2;
836  if (Mask.empty() ||
837  // Extra check required by isSingleSourceMaskImpl function (called by
838  // ShuffleVectorInst::isSingleSourceMask).
839  any_of(Mask, [Limit](int I) { return I >= Limit; }))
840  return Kind;
841  switch (Kind) {
844  return TTI::SK_Reverse;
846  return TTI::SK_Broadcast;
847  break;
850  return TTI::SK_Select;
852  return TTI::SK_Transpose;
853  break;
854  case TTI::SK_Select:
855  case TTI::SK_Reverse:
856  case TTI::SK_Broadcast:
857  case TTI::SK_Transpose:
860  case TTI::SK_Splice:
861  break;
862  }
863  return Kind;
864  }
865 
867  ArrayRef<int> Mask, int Index,
868  VectorType *SubTp) {
869 
871  case TTI::SK_Broadcast:
872  return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
873  case TTI::SK_Select:
874  case TTI::SK_Splice:
875  case TTI::SK_Reverse:
876  case TTI::SK_Transpose:
879  return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
881  return getExtractSubvectorOverhead(Tp, Index,
882  cast<FixedVectorType>(SubTp));
884  return getInsertSubvectorOverhead(Tp, Index,
885  cast<FixedVectorType>(SubTp));
886  }
887  llvm_unreachable("Unknown TTI::ShuffleKind");
888  }
889 
890  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
893  const Instruction *I = nullptr) {
894  if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
895  return 0;
896 
897  const TargetLoweringBase *TLI = getTLI();
898  int ISD = TLI->InstructionOpcodeToISD(Opcode);
899  assert(ISD && "Invalid opcode");
900  std::pair<InstructionCost, MVT> SrcLT =
901  TLI->getTypeLegalizationCost(DL, Src);
902  std::pair<InstructionCost, MVT> DstLT =
903  TLI->getTypeLegalizationCost(DL, Dst);
904 
905  TypeSize SrcSize = SrcLT.second.getSizeInBits();
906  TypeSize DstSize = DstLT.second.getSizeInBits();
907  bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
908  bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
909 
910  switch (Opcode) {
911  default:
912  break;
913  case Instruction::Trunc:
914  // Check for NOOP conversions.
915  if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
916  return 0;
918  case Instruction::BitCast:
919  // Bitcast between types that are legalized to the same type are free and
920  // assume int to/from ptr of the same size is also free.
921  if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
922  SrcSize == DstSize)
923  return 0;
924  break;
925  case Instruction::FPExt:
926  if (I && getTLI()->isExtFree(I))
927  return 0;
928  break;
929  case Instruction::ZExt:
930  if (TLI->isZExtFree(SrcLT.second, DstLT.second))
931  return 0;
933  case Instruction::SExt:
934  if (I && getTLI()->isExtFree(I))
935  return 0;
936 
937  // If this is a zext/sext of a load, return 0 if the corresponding
938  // extending load exists on target and the result type is legal.
939  if (CCH == TTI::CastContextHint::Normal) {
940  EVT ExtVT = EVT::getEVT(Dst);
941  EVT LoadVT = EVT::getEVT(Src);
942  unsigned LType =
943  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
944  if (DstLT.first == SrcLT.first &&
945  TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
946  return 0;
947  }
948  break;
949  case Instruction::AddrSpaceCast:
950  if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
951  Dst->getPointerAddressSpace()))
952  return 0;
953  break;
954  }
955 
956  auto *SrcVTy = dyn_cast<VectorType>(Src);
957  auto *DstVTy = dyn_cast<VectorType>(Dst);
958 
959  // If the cast is marked as legal (or promote) then assume low cost.
960  if (SrcLT.first == DstLT.first &&
961  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
962  return SrcLT.first;
963 
964  // Handle scalar conversions.
965  if (!SrcVTy && !DstVTy) {
966  // Just check the op cost. If the operation is legal then assume it costs
967  // 1.
968  if (!TLI->isOperationExpand(ISD, DstLT.second))
969  return 1;
970 
971  // Assume that illegal scalar instruction are expensive.
972  return 4;
973  }
974 
975  // Check vector-to-vector casts.
976  if (DstVTy && SrcVTy) {
977  // If the cast is between same-sized registers, then the check is simple.
978  if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
979 
980  // Assume that Zext is done using AND.
981  if (Opcode == Instruction::ZExt)
982  return SrcLT.first;
983 
984  // Assume that sext is done using SHL and SRA.
985  if (Opcode == Instruction::SExt)
986  return SrcLT.first * 2;
987 
988  // Just check the op cost. If the operation is legal then assume it
989  // costs
990  // 1 and multiply by the type-legalization overhead.
991  if (!TLI->isOperationExpand(ISD, DstLT.second))
992  return SrcLT.first * 1;
993  }
994 
995  // If we are legalizing by splitting, query the concrete TTI for the cost
996  // of casting the original vector twice. We also need to factor in the
997  // cost of the split itself. Count that as 1, to be consistent with
998  // TLI->getTypeLegalizationCost().
999  bool SplitSrc =
1000  TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1002  bool SplitDst =
1003  TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1005  if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1006  DstVTy->getElementCount().isVector()) {
1007  Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1008  Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1009  T *TTI = static_cast<T *>(this);
1010  // If both types need to be split then the split is free.
1011  InstructionCost SplitCost =
1012  (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1013  return SplitCost +
1014  (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1015  CostKind, I));
1016  }
1017 
1018  // Scalarization cost is Invalid, can't assume any num elements.
1019  if (isa<ScalableVectorType>(DstVTy))
1020  return InstructionCost::getInvalid();
1021 
1022  // In other cases where the source or destination are illegal, assume
1023  // the operation will get scalarized.
1024  unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1025  InstructionCost Cost = thisT()->getCastInstrCost(
1026  Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1027 
1028  // Return the cost of multiple scalar invocation plus the cost of
1029  // inserting and extracting the values.
1030  return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
1031  }
1032 
1033  // We already handled vector-to-vector and scalar-to-scalar conversions.
1034  // This
1035  // is where we handle bitcast between vectors and scalars. We need to assume
1036  // that the conversion is scalarized in one way or another.
1037  if (Opcode == Instruction::BitCast) {
1038  // Illegal bitcasts are done by storing and loading from a stack slot.
1039  return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
1040  (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
1041  }
1042 
1043  llvm_unreachable("Unhandled cast");
1044  }
1045 
1047  VectorType *VecTy, unsigned Index) {
1048  return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1049  Index) +
1050  thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1053  }
1054 
1056  const Instruction *I = nullptr) {
1057  return BaseT::getCFInstrCost(Opcode, CostKind, I);
1058  }
1059 
1060  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1061  CmpInst::Predicate VecPred,
1063  const Instruction *I = nullptr) {
1064  const TargetLoweringBase *TLI = getTLI();
1065  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1066  assert(ISD && "Invalid opcode");
1067 
1068  // TODO: Handle other cost kinds.
1070  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1071  I);
1072 
1073  // Selects on vectors are actually vector selects.
1074  if (ISD == ISD::SELECT) {
1075  assert(CondTy && "CondTy must exist");
1076  if (CondTy->isVectorTy())
1077  ISD = ISD::VSELECT;
1078  }
1079  std::pair<InstructionCost, MVT> LT =
1080  TLI->getTypeLegalizationCost(DL, ValTy);
1081 
1082  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1083  !TLI->isOperationExpand(ISD, LT.second)) {
1084  // The operation is legal. Assume it costs 1. Multiply
1085  // by the type-legalization overhead.
1086  return LT.first * 1;
1087  }
1088 
1089  // Otherwise, assume that the cast is scalarized.
1090  // TODO: If one of the types get legalized by splitting, handle this
1091  // similarly to what getCastInstrCost() does.
1092  if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1093  unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1094  if (CondTy)
1095  CondTy = CondTy->getScalarType();
1096  InstructionCost Cost = thisT()->getCmpSelInstrCost(
1097  Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
1098 
1099  // Return the cost of multiple scalar invocation plus the cost of
1100  // inserting and extracting the values.
1101  return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
1102  }
1103 
1104  // Unknown scalar opcode.
1105  return 1;
1106  }
1107 
1108  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1109  unsigned Index) {
1110  std::pair<InstructionCost, MVT> LT =
1111  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
1112 
1113  return LT.first;
1114  }
1115 
1116  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1117  MaybeAlign Alignment, unsigned AddressSpace,
1119  const Instruction *I = nullptr) {
1120  assert(!Src->isVoidTy() && "Invalid type");
1121  // Assume types, such as structs, are expensive.
1122  if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1123  return 4;
1124  std::pair<InstructionCost, MVT> LT =
1125  getTLI()->getTypeLegalizationCost(DL, Src);
1126 
1127  // Assuming that all loads of legal types cost 1.
1128  InstructionCost Cost = LT.first;
1130  return Cost;
1131 
1132  if (Src->isVectorTy() &&
1133  // In practice it's not currently possible to have a change in lane
1134  // length for extending loads or truncating stores so both types should
1135  // have the same scalable property.
1136  TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
1137  LT.second.getSizeInBits())) {
1138  // This is a vector load that legalizes to a larger type than the vector
1139  // itself. Unless the corresponding extending load or truncating store is
1140  // legal, then this will scalarize.
1142  EVT MemVT = getTLI()->getValueType(DL, Src);
1143  if (Opcode == Instruction::Store)
1144  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1145  else
1146  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1147 
1148  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
1149  // This is a vector load/store for some illegal type that is scalarized.
1150  // We must account for the cost of building or decomposing the vector.
1151  Cost += getScalarizationOverhead(cast<VectorType>(Src),
1152  Opcode != Instruction::Store,
1153  Opcode == Instruction::Store);
1154  }
1155  }
1156 
1157  return Cost;
1158  }
1159 
1160  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
1161  Align Alignment, unsigned AddressSpace,
1163  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1164  CostKind);
1165  }
1166 
1167  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1168  const Value *Ptr, bool VariableMask,
1169  Align Alignment,
1171  const Instruction *I = nullptr) {
1172  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1173  true, CostKind);
1174  }
1175 
1177  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1178  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1179  bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1180  auto *VT = cast<FixedVectorType>(VecTy);
1181 
1182  unsigned NumElts = VT->getNumElements();
1183  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1184 
1185  unsigned NumSubElts = NumElts / Factor;
1186  auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1187 
1188  // Firstly, the cost of load/store operation.
1189  InstructionCost Cost;
1190  if (UseMaskForCond || UseMaskForGaps)
1191  Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1193  else
1194  Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1195  CostKind);
1196 
1197  // Legalize the vector type, and get the legalized and unlegalized type
1198  // sizes.
1199  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
1200  unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1201  unsigned VecTyLTSize = VecTyLT.getStoreSize();
1202 
1203  // Scale the cost of the memory operation by the fraction of legalized
1204  // instructions that will actually be used. We shouldn't account for the
1205  // cost of dead instructions since they will be removed.
1206  //
1207  // E.g., An interleaved load of factor 8:
1208  // %vec = load <16 x i64>, <16 x i64>* %ptr
1209  // %v0 = shufflevector %vec, undef, <0, 8>
1210  //
1211  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1212  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1213  // type). The other loads are unused.
1214  //
1215  // TODO: Note that legalization can turn masked loads/stores into unmasked
1216  // (legalized) loads/stores. This can be reflected in the cost.
1217  if (Cost.isValid() && VecTySize > VecTyLTSize) {
1218  // The number of loads of a legal type it will take to represent a load
1219  // of the unlegalized vector type.
1220  unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1221 
1222  // The number of elements of the unlegalized type that correspond to a
1223  // single legal instruction.
1224  unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1225 
1226  // Determine which legal instructions will be used.
1227  BitVector UsedInsts(NumLegalInsts, false);
1228  for (unsigned Index : Indices)
1229  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1230  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1231 
1232  // Scale the cost of the load by the fraction of legal instructions that
1233  // will be used.
1234  Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
1235  NumLegalInsts);
1236  }
1237 
1238  // Then plus the cost of interleave operation.
1239  assert(Indices.size() <= Factor &&
1240  "Interleaved memory op has too many members");
1241 
1242  APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1243  for (unsigned Index : Indices) {
1244  assert(Index < Factor && "Invalid index for interleaved memory op");
1245  for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1246  DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1247  }
1248 
1249  if (Opcode == Instruction::Load) {
1250  // The interleave cost is similar to extract sub vectors' elements
1251  // from the wide vector, and insert them into sub vectors.
1252  //
1253  // E.g. An interleaved load of factor 2 (with one member of index 0):
1254  // %vec = load <8 x i32>, <8 x i32>* %ptr
1255  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1256  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1257  // <8 x i32> vector and insert them into a <4 x i32> vector.
1258  InstructionCost InsSubCost =
1259  getScalarizationOverhead(SubVT, /*Insert*/ true, /*Extract*/ false);
1260  Cost += Indices.size() * InsSubCost;
1261  Cost +=
1262  thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1263  /*Insert*/ false, /*Extract*/ true);
1264  } else {
1265  // The interleave cost is extract elements from sub vectors, and
1266  // insert them into the wide vector.
1267  //
1268  // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
1269  // (using VF=4):
1270  // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
1271  // %gaps.mask = <true, true, false, true, true, false,
1272  // true, true, false, true, true, false>
1273  // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
1274  // i32 Align, <12 x i1> %gaps.mask
1275  // The cost is estimated as extract all elements (of actual members,
1276  // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
1277  // i32> vector.
1278  InstructionCost ExtSubCost =
1279  getScalarizationOverhead(SubVT, /*Insert*/ false, /*Extract*/ true);
1280  Cost += ExtSubCost * Indices.size();
1281  Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1282  /*Insert*/ true,
1283  /*Extract*/ false);
1284  }
1285 
1286  if (!UseMaskForCond)
1287  return Cost;
1288 
1289  Type *I8Type = Type::getInt8Ty(VT->getContext());
1290  auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1291  SubVT = FixedVectorType::get(I8Type, NumSubElts);
1292 
1293  // The Mask shuffling cost is extract all the elements of the Mask
1294  // and insert each of them Factor times into the wide vector:
1295  //
1296  // E.g. an interleaved group with factor 3:
1297  // %mask = icmp ult <8 x i32> %vec1, %vec2
1298  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1299  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1300  // The cost is estimated as extract all mask elements from the <8xi1> mask
1301  // vector and insert them factor times into the <24xi1> shuffled mask
1302  // vector.
1303  Cost += getScalarizationOverhead(SubVT, /*Insert*/ false, /*Extract*/ true);
1304  Cost +=
1305  getScalarizationOverhead(MaskVT, /*Insert*/ true, /*Extract*/ false);
1306 
1307  // The Gaps mask is invariant and created outside the loop, therefore the
1308  // cost of creating it is not accounted for here. However if we have both
1309  // a MaskForGaps and some other mask that guards the execution of the
1310  // memory access, we need to account for the cost of And-ing the two masks
1311  // inside the loop.
1312  if (UseMaskForGaps)
1313  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1314  CostKind);
1315 
1316  return Cost;
1317  }
1318 
1319  /// Get intrinsic cost based on arguments.
1322  // Check for generically free intrinsics.
1323  if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1324  return 0;
1325 
1326  // Assume that target intrinsics are cheap.
1327  Intrinsic::ID IID = ICA.getID();
1328  if (Function::isTargetIntrinsic(IID))
1330 
1331  if (ICA.isTypeBasedOnly())
1333 
1334  Type *RetTy = ICA.getReturnType();
1335 
1336  ElementCount RetVF =
1337  (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1338  : ElementCount::getFixed(1));
1339  const IntrinsicInst *I = ICA.getInst();
1341  FastMathFlags FMF = ICA.getFlags();
1342  switch (IID) {
1343  default:
1344  break;
1345 
1346  case Intrinsic::cttz:
1347  // FIXME: If necessary, this should go in target-specific overrides.
1348  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
1350  break;
1351 
1352  case Intrinsic::ctlz:
1353  // FIXME: If necessary, this should go in target-specific overrides.
1354  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz())
1356  break;
1357 
1358  case Intrinsic::memcpy:
1359  return thisT()->getMemcpyCost(ICA.getInst());
1360 
1361  case Intrinsic::masked_scatter: {
1362  const Value *Mask = Args[3];
1363  bool VarMask = !isa<Constant>(Mask);
1364  Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1365  return thisT()->getGatherScatterOpCost(Instruction::Store,
1366  ICA.getArgTypes()[0], Args[1],
1367  VarMask, Alignment, CostKind, I);
1368  }
1369  case Intrinsic::masked_gather: {
1370  const Value *Mask = Args[2];
1371  bool VarMask = !isa<Constant>(Mask);
1372  Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1373  return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1374  VarMask, Alignment, CostKind, I);
1375  }
1376  case Intrinsic::experimental_stepvector: {
1377  if (isa<ScalableVectorType>(RetTy))
1379  // The cost of materialising a constant integer vector.
1381  }
1382  case Intrinsic::experimental_vector_extract: {
1383  // FIXME: Handle case where a scalable vector is extracted from a scalable
1384  // vector
1385  if (isa<ScalableVectorType>(RetTy))
1387  unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1388  return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1389  cast<VectorType>(Args[0]->getType()), None,
1390  Index, cast<VectorType>(RetTy));
1391  }
1392  case Intrinsic::experimental_vector_insert: {
1393  // FIXME: Handle case where a scalable vector is inserted into a scalable
1394  // vector
1395  if (isa<ScalableVectorType>(Args[1]->getType()))
1397  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1398  return thisT()->getShuffleCost(
1399  TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None,
1400  Index, cast<VectorType>(Args[1]->getType()));
1401  }
1402  case Intrinsic::experimental_vector_reverse: {
1403  return thisT()->getShuffleCost(TTI::SK_Reverse,
1404  cast<VectorType>(Args[0]->getType()), None,
1405  0, cast<VectorType>(RetTy));
1406  }
1407  case Intrinsic::experimental_vector_splice: {
1408  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1409  return thisT()->getShuffleCost(TTI::SK_Splice,
1410  cast<VectorType>(Args[0]->getType()), None,
1411  Index, cast<VectorType>(RetTy));
1412  }
1413  case Intrinsic::vector_reduce_add:
1414  case Intrinsic::vector_reduce_mul:
1415  case Intrinsic::vector_reduce_and:
1416  case Intrinsic::vector_reduce_or:
1417  case Intrinsic::vector_reduce_xor:
1418  case Intrinsic::vector_reduce_smax:
1419  case Intrinsic::vector_reduce_smin:
1420  case Intrinsic::vector_reduce_fmax:
1421  case Intrinsic::vector_reduce_fmin:
1422  case Intrinsic::vector_reduce_umax:
1423  case Intrinsic::vector_reduce_umin: {
1424  IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1426  }
1427  case Intrinsic::vector_reduce_fadd:
1428  case Intrinsic::vector_reduce_fmul: {
1430  IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1432  }
1433  case Intrinsic::fshl:
1434  case Intrinsic::fshr: {
1435  if (isa<ScalableVectorType>(RetTy))
1437  const Value *X = Args[0];
1438  const Value *Y = Args[1];
1439  const Value *Z = Args[2];
1440  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1441  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1442  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1443  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1445  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1446  : TTI::OP_None;
1447  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1448  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1449  InstructionCost Cost = 0;
1450  Cost +=
1451  thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1452  Cost +=
1453  thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1454  Cost += thisT()->getArithmeticInstrCost(
1455  BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
1456  Cost += thisT()->getArithmeticInstrCost(
1457  BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
1458  // Non-constant shift amounts requires a modulo.
1459  if (OpKindZ != TTI::OK_UniformConstantValue &&
1461  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1462  CostKind, OpKindZ, OpKindBW,
1463  OpPropsZ, OpPropsBW);
1464  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1465  if (X != Y) {
1466  Type *CondTy = RetTy->getWithNewBitWidth(1);
1467  Cost +=
1468  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1470  Cost +=
1471  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1473  }
1474  return Cost;
1475  }
1476  }
1477 
1478  // Assume that we need to scalarize this intrinsic.
1479  // Compute the scalarization overhead based on Args for a vector
1480  // intrinsic.
1481  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
1482  if (RetVF.isVector() && !RetVF.isScalable()) {
1483  ScalarizationCost = 0;
1484  if (!RetTy->isVoidTy())
1485  ScalarizationCost +=
1486  getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1487  ScalarizationCost +=
1489  }
1490 
1491  IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1492  ScalarizationCost);
1493  return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1494  }
1495 
1496  /// Get intrinsic cost based on argument types.
1497  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1498  /// cost of scalarizing the arguments and the return value will be computed
1499  /// based on types.
1503  Intrinsic::ID IID = ICA.getID();
1504  Type *RetTy = ICA.getReturnType();
1505  const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1506  FastMathFlags FMF = ICA.getFlags();
1507  InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
1508  bool SkipScalarizationCost = ICA.skipScalarizationCost();
1509 
1510  VectorType *VecOpTy = nullptr;
1511  if (!Tys.empty()) {
1512  // The vector reduction operand is operand 0 except for fadd/fmul.
1513  // Their operand 0 is a scalar start value, so the vector op is operand 1.
1514  unsigned VecTyIndex = 0;
1515  if (IID == Intrinsic::vector_reduce_fadd ||
1516  IID == Intrinsic::vector_reduce_fmul)
1517  VecTyIndex = 1;
1518  assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1519  VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1520  }
1521 
1522  // Library call cost - other than size, make it expensive.
1523  unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
1525  switch (IID) {
1526  default: {
1527  // Scalable vectors cannot be scalarized, so return Invalid.
1528  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1529  return isa<ScalableVectorType>(Ty);
1530  }))
1531  return InstructionCost::getInvalid();
1532 
1533  // Assume that we need to scalarize this intrinsic.
1534  InstructionCost ScalarizationCost =
1535  SkipScalarizationCost ? ScalarizationCostPassed : 0;
1536  unsigned ScalarCalls = 1;
1537  Type *ScalarRetTy = RetTy;
1538  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1539  if (!SkipScalarizationCost)
1540  ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1541  ScalarCalls = std::max(ScalarCalls,
1542  cast<FixedVectorType>(RetVTy)->getNumElements());
1543  ScalarRetTy = RetTy->getScalarType();
1544  }
1545  SmallVector<Type *, 4> ScalarTys;
1546  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1547  Type *Ty = Tys[i];
1548  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1549  if (!SkipScalarizationCost)
1550  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1551  ScalarCalls = std::max(ScalarCalls,
1552  cast<FixedVectorType>(VTy)->getNumElements());
1553  Ty = Ty->getScalarType();
1554  }
1555  ScalarTys.push_back(Ty);
1556  }
1557  if (ScalarCalls == 1)
1558  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1559 
1560  IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1561  InstructionCost ScalarCost =
1562  thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1563 
1564  return ScalarCalls * ScalarCost + ScalarizationCost;
1565  }
1566  // Look for intrinsics that can be lowered directly or turned into a scalar
1567  // intrinsic call.
1568  case Intrinsic::sqrt:
1569  ISDs.push_back(ISD::FSQRT);
1570  break;
1571  case Intrinsic::sin:
1572  ISDs.push_back(ISD::FSIN);
1573  break;
1574  case Intrinsic::cos:
1575  ISDs.push_back(ISD::FCOS);
1576  break;
1577  case Intrinsic::exp:
1578  ISDs.push_back(ISD::FEXP);
1579  break;
1580  case Intrinsic::exp2:
1581  ISDs.push_back(ISD::FEXP2);
1582  break;
1583  case Intrinsic::log:
1584  ISDs.push_back(ISD::FLOG);
1585  break;
1586  case Intrinsic::log10:
1587  ISDs.push_back(ISD::FLOG10);
1588  break;
1589  case Intrinsic::log2:
1590  ISDs.push_back(ISD::FLOG2);
1591  break;
1592  case Intrinsic::fabs:
1593  ISDs.push_back(ISD::FABS);
1594  break;
1595  case Intrinsic::canonicalize:
1596  ISDs.push_back(ISD::FCANONICALIZE);
1597  break;
1598  case Intrinsic::minnum:
1599  ISDs.push_back(ISD::FMINNUM);
1600  break;
1601  case Intrinsic::maxnum:
1602  ISDs.push_back(ISD::FMAXNUM);
1603  break;
1604  case Intrinsic::minimum:
1605  ISDs.push_back(ISD::FMINIMUM);
1606  break;
1607  case Intrinsic::maximum:
1608  ISDs.push_back(ISD::FMAXIMUM);
1609  break;
1610  case Intrinsic::copysign:
1611  ISDs.push_back(ISD::FCOPYSIGN);
1612  break;
1613  case Intrinsic::floor:
1614  ISDs.push_back(ISD::FFLOOR);
1615  break;
1616  case Intrinsic::ceil:
1617  ISDs.push_back(ISD::FCEIL);
1618  break;
1619  case Intrinsic::trunc:
1620  ISDs.push_back(ISD::FTRUNC);
1621  break;
1622  case Intrinsic::nearbyint:
1623  ISDs.push_back(ISD::FNEARBYINT);
1624  break;
1625  case Intrinsic::rint:
1626  ISDs.push_back(ISD::FRINT);
1627  break;
1628  case Intrinsic::round:
1629  ISDs.push_back(ISD::FROUND);
1630  break;
1631  case Intrinsic::roundeven:
1632  ISDs.push_back(ISD::FROUNDEVEN);
1633  break;
1634  case Intrinsic::pow:
1635  ISDs.push_back(ISD::FPOW);
1636  break;
1637  case Intrinsic::fma:
1638  ISDs.push_back(ISD::FMA);
1639  break;
1640  case Intrinsic::fmuladd:
1641  ISDs.push_back(ISD::FMA);
1642  break;
1643  case Intrinsic::experimental_constrained_fmuladd:
1644  ISDs.push_back(ISD::STRICT_FMA);
1645  break;
1646  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1647  case Intrinsic::lifetime_start:
1648  case Intrinsic::lifetime_end:
1649  case Intrinsic::sideeffect:
1650  case Intrinsic::pseudoprobe:
1651  case Intrinsic::arithmetic_fence:
1652  return 0;
1653  case Intrinsic::masked_store: {
1654  Type *Ty = Tys[0];
1655  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1656  return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1657  CostKind);
1658  }
1659  case Intrinsic::masked_load: {
1660  Type *Ty = RetTy;
1661  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1662  return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1663  CostKind);
1664  }
1665  case Intrinsic::vector_reduce_add:
1666  return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1667  None, CostKind);
1668  case Intrinsic::vector_reduce_mul:
1669  return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1670  None, CostKind);
1671  case Intrinsic::vector_reduce_and:
1672  return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1673  None, CostKind);
1674  case Intrinsic::vector_reduce_or:
1675  return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None,
1676  CostKind);
1677  case Intrinsic::vector_reduce_xor:
1678  return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1679  None, CostKind);
1680  case Intrinsic::vector_reduce_fadd:
1681  return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1682  FMF, CostKind);
1683  case Intrinsic::vector_reduce_fmul:
1684  return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1685  FMF, CostKind);
1686  case Intrinsic::vector_reduce_smax:
1687  case Intrinsic::vector_reduce_smin:
1688  case Intrinsic::vector_reduce_fmax:
1689  case Intrinsic::vector_reduce_fmin:
1690  return thisT()->getMinMaxReductionCost(
1691  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1692  /*IsUnsigned=*/false, CostKind);
1693  case Intrinsic::vector_reduce_umax:
1694  case Intrinsic::vector_reduce_umin:
1695  return thisT()->getMinMaxReductionCost(
1696  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1697  /*IsUnsigned=*/true, CostKind);
1698  case Intrinsic::abs: {
1699  // abs(X) = select(icmp(X,0),X,sub(0,X))
1700  Type *CondTy = RetTy->getWithNewBitWidth(1);
1702  InstructionCost Cost = 0;
1703  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1704  Pred, CostKind);
1705  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1706  Pred, CostKind);
1707  // TODO: Should we add an OperandValueProperties::OP_Zero property?
1708  Cost += thisT()->getArithmeticInstrCost(
1709  BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
1710  return Cost;
1711  }
1712  case Intrinsic::smax:
1713  case Intrinsic::smin:
1714  case Intrinsic::umax:
1715  case Intrinsic::umin: {
1716  // minmax(X,Y) = select(icmp(X,Y),X,Y)
1717  Type *CondTy = RetTy->getWithNewBitWidth(1);
1718  bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
1719  CmpInst::Predicate Pred =
1720  IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
1721  InstructionCost Cost = 0;
1722  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1723  Pred, CostKind);
1724  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1725  Pred, CostKind);
1726  return Cost;
1727  }
1728  case Intrinsic::sadd_sat:
1729  case Intrinsic::ssub_sat: {
1730  Type *CondTy = RetTy->getWithNewBitWidth(1);
1731 
1732  Type *OpTy = StructType::create({RetTy, CondTy});
1733  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1734  ? Intrinsic::sadd_with_overflow
1735  : Intrinsic::ssub_with_overflow;
1737 
1738  // SatMax -> Overflow && SumDiff < 0
1739  // SatMin -> Overflow && SumDiff >= 0
1740  InstructionCost Cost = 0;
1741  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1742  nullptr, ScalarizationCostPassed);
1743  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1744  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1745  Pred, CostKind);
1746  Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1747  CondTy, Pred, CostKind);
1748  return Cost;
1749  }
1750  case Intrinsic::uadd_sat:
1751  case Intrinsic::usub_sat: {
1752  Type *CondTy = RetTy->getWithNewBitWidth(1);
1753 
1754  Type *OpTy = StructType::create({RetTy, CondTy});
1755  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1756  ? Intrinsic::uadd_with_overflow
1757  : Intrinsic::usub_with_overflow;
1758 
1759  InstructionCost Cost = 0;
1760  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1761  nullptr, ScalarizationCostPassed);
1762  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1763  Cost +=
1764  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1766  return Cost;
1767  }
1768  case Intrinsic::smul_fix:
1769  case Intrinsic::umul_fix: {
1770  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1771  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1772 
1773  unsigned ExtOp =
1774  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1776 
1777  InstructionCost Cost = 0;
1778  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1779  Cost +=
1780  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1781  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1782  CCH, CostKind);
1783  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1786  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1789  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1790  return Cost;
1791  }
1792  case Intrinsic::sadd_with_overflow:
1793  case Intrinsic::ssub_with_overflow: {
1794  Type *SumTy = RetTy->getContainedType(0);
1795  Type *OverflowTy = RetTy->getContainedType(1);
1796  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1798  : BinaryOperator::Sub;
1799 
1800  // Add:
1801  // Overflow -> (Result < LHS) ^ (RHS < 0)
1802  // Sub:
1803  // Overflow -> (Result < LHS) ^ (RHS > 0)
1804  InstructionCost Cost = 0;
1805  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1806  Cost += 2 * thisT()->getCmpSelInstrCost(
1807  Instruction::ICmp, SumTy, OverflowTy,
1809  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
1810  CostKind);
1811  return Cost;
1812  }
1813  case Intrinsic::uadd_with_overflow:
1814  case Intrinsic::usub_with_overflow: {
1815  Type *SumTy = RetTy->getContainedType(0);
1816  Type *OverflowTy = RetTy->getContainedType(1);
1817  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1819  : BinaryOperator::Sub;
1820  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
1823 
1824  InstructionCost Cost = 0;
1825  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1826  Cost +=
1827  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1828  Pred, CostKind);
1829  return Cost;
1830  }
1831  case Intrinsic::smul_with_overflow:
1832  case Intrinsic::umul_with_overflow: {
1833  Type *MulTy = RetTy->getContainedType(0);
1834  Type *OverflowTy = RetTy->getContainedType(1);
1835  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1836  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1837  bool IsSigned = IID == Intrinsic::smul_with_overflow;
1838 
1839  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
1841 
1842  InstructionCost Cost = 0;
1843  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
1844  Cost +=
1845  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1846  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1847  CCH, CostKind);
1848  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
1851 
1852  if (IsSigned)
1853  Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1856 
1857  Cost += thisT()->getCmpSelInstrCost(
1858  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
1859  return Cost;
1860  }
1861  case Intrinsic::ctpop:
1862  ISDs.push_back(ISD::CTPOP);
1863  // In case of legalization use TCC_Expensive. This is cheaper than a
1864  // library call but still not a cheap instruction.
1865  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1866  break;
1867  case Intrinsic::ctlz:
1868  ISDs.push_back(ISD::CTLZ);
1869  break;
1870  case Intrinsic::cttz:
1871  ISDs.push_back(ISD::CTTZ);
1872  break;
1873  case Intrinsic::bswap:
1874  ISDs.push_back(ISD::BSWAP);
1875  break;
1876  case Intrinsic::bitreverse:
1877  ISDs.push_back(ISD::BITREVERSE);
1878  break;
1879  }
1880 
1881  const TargetLoweringBase *TLI = getTLI();
1882  std::pair<InstructionCost, MVT> LT =
1883  TLI->getTypeLegalizationCost(DL, RetTy);
1884 
1887  for (unsigned ISD : ISDs) {
1888  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1889  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1890  TLI->isFAbsFree(LT.second)) {
1891  return 0;
1892  }
1893 
1894  // The operation is legal. Assume it costs 1.
1895  // If the type is split to multiple registers, assume that there is some
1896  // overhead to this.
1897  // TODO: Once we have extract/insert subvector cost we need to use them.
1898  if (LT.first > 1)
1899  LegalCost.push_back(LT.first * 2);
1900  else
1901  LegalCost.push_back(LT.first * 1);
1902  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1903  // If the operation is custom lowered then assume
1904  // that the code is twice as expensive.
1905  CustomCost.push_back(LT.first * 2);
1906  }
1907  }
1908 
1909  auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1910  if (MinLegalCostI != LegalCost.end())
1911  return *MinLegalCostI;
1912 
1913  auto MinCustomCostI =
1914  std::min_element(CustomCost.begin(), CustomCost.end());
1915  if (MinCustomCostI != CustomCost.end())
1916  return *MinCustomCostI;
1917 
1918  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1919  // point mul followed by an add.
1920  if (IID == Intrinsic::fmuladd)
1921  return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
1922  CostKind) +
1923  thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
1924  CostKind);
1925  if (IID == Intrinsic::experimental_constrained_fmuladd) {
1926  IntrinsicCostAttributes FMulAttrs(
1927  Intrinsic::experimental_constrained_fmul, RetTy, Tys);
1928  IntrinsicCostAttributes FAddAttrs(
1929  Intrinsic::experimental_constrained_fadd, RetTy, Tys);
1930  return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
1931  thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
1932  }
1933 
1934  // Else, assume that we need to scalarize this intrinsic. For math builtins
1935  // this will emit a costly libcall, adding call overhead and spills. Make it
1936  // very expensive.
1937  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1938  // Scalable vectors cannot be scalarized, so return Invalid.
1939  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1940  return isa<ScalableVectorType>(Ty);
1941  }))
1942  return InstructionCost::getInvalid();
1943 
1944  InstructionCost ScalarizationCost =
1945  SkipScalarizationCost ? ScalarizationCostPassed
1946  : getScalarizationOverhead(RetVTy, true, false);
1947 
1948  unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
1949  SmallVector<Type *, 4> ScalarTys;
1950  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1951  Type *Ty = Tys[i];
1952  if (Ty->isVectorTy())
1953  Ty = Ty->getScalarType();
1954  ScalarTys.push_back(Ty);
1955  }
1956  IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
1957  InstructionCost ScalarCost =
1958  thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1959  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1960  if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
1961  if (!ICA.skipScalarizationCost())
1962  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1963  ScalarCalls = std::max(ScalarCalls,
1964  cast<FixedVectorType>(VTy)->getNumElements());
1965  }
1966  }
1967  return ScalarCalls * ScalarCost + ScalarizationCost;
1968  }
1969 
1970  // This is going to be turned into a library call, make it expensive.
1971  return SingleCallCost;
1972  }
1973 
1974  /// Compute a cost of the given call instruction.
1975  ///
1976  /// Compute the cost of calling function F with return type RetTy and
1977  /// argument types Tys. F might be nullptr, in this case the cost of an
1978  /// arbitrary call with the specified signature will be returned.
1979  /// This is used, for instance, when we estimate call of a vector
1980  /// counterpart of the given function.
1981  /// \param F Called function, might be nullptr.
1982  /// \param RetTy Return value types.
1983  /// \param Tys Argument types.
1984  /// \returns The cost of Call instruction.
1986  ArrayRef<Type *> Tys,
1988  return 10;
1989  }
1990 
1991  unsigned getNumberOfParts(Type *Tp) {
1992  std::pair<InstructionCost, MVT> LT =
1993  getTLI()->getTypeLegalizationCost(DL, Tp);
1994  return *LT.first.getValue();
1995  }
1996 
1998  const SCEV *) {
1999  return 0;
2000  }
2001 
2002  /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
2003  /// We're assuming that reduction operation are performing the following way:
2004  ///
2005  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
2006  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
2007  /// \----------------v-------------/ \----------v------------/
2008  /// n/2 elements n/2 elements
2009  /// %red1 = op <n x t> %val, <n x t> val1
2010  /// After this operation we have a vector %red1 where only the first n/2
2011  /// elements are meaningful, the second n/2 elements are undefined and can be
2012  /// dropped. All other operations are actually working with the vector of
2013  /// length n/2, not n, though the real vector length is still n.
2014  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
2015  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
2016  /// \----------------v-------------/ \----------v------------/
2017  /// n/4 elements 3*n/4 elements
2018  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
2019  /// length n/2, the resulting vector has length n/4 etc.
2020  ///
2021  /// The cost model should take into account that the actual length of the
2022  /// vector is reduced on each iteration.
2025  Type *ScalarTy = Ty->getElementType();
2026  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2027  if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2028  ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2029  NumVecElts >= 2) {
2030  // Or reduction for i1 is represented as:
2031  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2032  // %res = cmp ne iReduxWidth %val, 0
2033  // And reduction for i1 is represented as:
2034  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2035  // %res = cmp eq iReduxWidth %val, 11111
2036  Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2037  return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2039  thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2042  }
2043  unsigned NumReduxLevels = Log2_32(NumVecElts);
2044  InstructionCost ArithCost = 0;
2045  InstructionCost ShuffleCost = 0;
2046  std::pair<InstructionCost, MVT> LT =
2047  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
2048  unsigned LongVectorCount = 0;
2049  unsigned MVTLen =
2050  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2051  while (NumVecElts > MVTLen) {
2052  NumVecElts /= 2;
2053  VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2054  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2055  NumVecElts, SubTy);
2056  ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2057  Ty = SubTy;
2058  ++LongVectorCount;
2059  }
2060 
2061  NumReduxLevels -= LongVectorCount;
2062 
2063  // The minimal length of the vector is limited by the real length of vector
2064  // operations performed on the current platform. That's why several final
2065  // reduction operations are performed on the vectors with the same
2066  // architecture-dependent length.
2067 
2068  // By default reductions need one shuffle per reduction level.
2069  ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2070  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
2071  ArithCost +=
2072  NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2073  return ShuffleCost + ArithCost +
2074  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2075  }
2076 
2077  /// Try to calculate the cost of performing strict (in-order) reductions,
2078  /// which involves doing a sequence of floating point additions in lane
2079  /// order, starting with an initial value. For example, consider a scalar
2080  /// initial value 'InitVal' of type float and a vector of type <4 x float>:
2081  ///
2082  /// Vector = <float %v0, float %v1, float %v2, float %v3>
2083  ///
2084  /// %add1 = %InitVal + %v0
2085  /// %add2 = %add1 + %v1
2086  /// %add3 = %add2 + %v2
2087  /// %add4 = %add3 + %v3
2088  ///
2089  /// As a simple estimate we can say the cost of such a reduction is 4 times
2090  /// the cost of a scalar FP addition. We can only estimate the costs for
2091  /// fixed-width vectors here because for scalable vectors we do not know the
2092  /// runtime number of operations.
2095  // Targets must implement a default value for the scalable case, since
2096  // we don't know how many lanes the vector has.
2097  if (isa<ScalableVectorType>(Ty))
2098  return InstructionCost::getInvalid();
2099 
2100  auto *VTy = cast<FixedVectorType>(Ty);
2101  InstructionCost ExtractCost =
2102  getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true);
2103  InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2104  Opcode, VTy->getElementType(), CostKind);
2105  ArithCost *= VTy->getNumElements();
2106 
2107  return ExtractCost + ArithCost;
2108  }
2109 
2114  return getOrderedReductionCost(Opcode, Ty, CostKind);
2115  return getTreeReductionCost(Opcode, Ty, CostKind);
2116  }
2117 
2118  /// Try to calculate op costs for min/max reduction operations.
2119  /// \param CondTy Conditional type for the Select instruction.
2121  bool IsUnsigned,
2123  Type *ScalarTy = Ty->getElementType();
2124  Type *ScalarCondTy = CondTy->getElementType();
2125  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2126  unsigned NumReduxLevels = Log2_32(NumVecElts);
2127  unsigned CmpOpcode;
2128  if (Ty->isFPOrFPVectorTy()) {
2129  CmpOpcode = Instruction::FCmp;
2130  } else {
2131  assert(Ty->isIntOrIntVectorTy() &&
2132  "expecting floating point or integer type for min/max reduction");
2133  CmpOpcode = Instruction::ICmp;
2134  }
2135  InstructionCost MinMaxCost = 0;
2136  InstructionCost ShuffleCost = 0;
2137  std::pair<InstructionCost, MVT> LT =
2138  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
2139  unsigned LongVectorCount = 0;
2140  unsigned MVTLen =
2141  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2142  while (NumVecElts > MVTLen) {
2143  NumVecElts /= 2;
2144  auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2145  CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
2146 
2147  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2148  NumVecElts, SubTy);
2149  MinMaxCost +=
2150  thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2152  thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
2154  Ty = SubTy;
2155  ++LongVectorCount;
2156  }
2157 
2158  NumReduxLevels -= LongVectorCount;
2159 
2160  // The minimal length of the vector is limited by the real length of vector
2161  // operations performed on the current platform. That's why several final
2162  // reduction opertions are perfomed on the vectors with the same
2163  // architecture-dependent length.
2164  ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2165  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
2166  MinMaxCost +=
2167  NumReduxLevels *
2168  (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2170  thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2172  // The last min/max should be in vector registers and we counted it above.
2173  // So just need a single extractelement.
2174  return ShuffleCost + MinMaxCost +
2175  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2176  }
2177 
2178  InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
2179  Type *ResTy, VectorType *Ty,
2181  // Without any native support, this is equivalent to the cost of
2182  // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext))
2183  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2184  InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2185  Instruction::Add, ExtTy, None, CostKind);
2186  InstructionCost MulCost = 0;
2187  InstructionCost ExtCost = thisT()->getCastInstrCost(
2188  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2190  if (IsMLA) {
2191  MulCost =
2192  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2193  ExtCost *= 2;
2194  }
2195 
2196  return RedCost + MulCost + ExtCost;
2197  }
2198 
2200 
2201  /// @}
2202 };
2203 
2204 /// Concrete BasicTTIImpl that can be used if no further customization
2205 /// is needed.
2206 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2208 
2210 
2211  const TargetSubtargetInfo *ST;
2212  const TargetLoweringBase *TLI;
2213 
2214  const TargetSubtargetInfo *getST() const { return ST; }
2215  const TargetLoweringBase *getTLI() const { return TLI; }
2216 
2217 public:
2218  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2219 };
2220 
2221 } // end namespace llvm
2222 
2223 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
llvm::ShuffleVectorInst::isZeroEltSplatMask
static bool isZeroEltSplatMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
Definition: Instructions.cpp:2210
llvm::MCSubtargetInfo::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: MCSubtargetInfo.cpp:359
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:884
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::BasicTTIImplBase::getVectorSplitCost
InstructionCost getVectorSplitCost()
Definition: BasicTTIImpl.h:2199
llvm::BasicTTIImplBase::getFPOpCost
InstructionCost getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:473
ValueTypes.h
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:478
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:457
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:862
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:485
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1072
llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:708
llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition: TargetTransformInfoImpl.h:154
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1310
llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:437
MathExtras.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:453
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:192
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::BasicTTIImplBase::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:624
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfoImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Definition: TargetTransformInfoImpl.h:160
llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:483
llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:345
llvm::BasicTTIImplBase::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:266
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1286
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:720
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:946
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:386
llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:295
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:898
llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:278
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
T
llvm::Function
Definition: Function.h:62
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1310
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:666
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::BasicTTIImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1055
llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:355
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:541
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:210
llvm::BasicTTIImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:586
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1031
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:633
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:150
llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:243
llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:252
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:460
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:149
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:751
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:875
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:178
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetLowering.h:2401
APInt.h
llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:469
llvm::BasicTTIImplBase::~BasicTTIImplBase
virtual ~BasicTTIImplBase()=default
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:481
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition: TargetLowering.h:1252
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1410
llvm::TargetLoweringBase::isIndexedLoadLegal
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1291
llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:322
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:535
llvm::TargetLoweringBase::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I) const
Definition: TargetLowering.h:2532
llvm::BasicTTIImplBase::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
Definition: BasicTTIImpl.h:833
llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition: TargetLoweringBase.cpp:1620
llvm::BasicTTIImplBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:610
llvm::Optional
Definition: APInt.h:33
llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:316
llvm::BasicTTIImplBase::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:642
llvm::SmallPtrSet< const BasicBlock *, 4 >
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:886
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1840
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1341
llvm::TargetTransformInfoImplCRTPBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:865
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:515
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2198
llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:663
llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition: TargetLowering.h:196
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:161
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: TargetLowering.h:605
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:870
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:239
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: BasicTTIImpl.h:326
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2128
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:948
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:867
llvm::BasicTTIImplBase::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:264
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:152
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:860
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::LinearPolySize< TypeSize >::isKnownLT
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:329
TargetTransformInfoImpl.h
llvm::BasicTTIImplBase::getTreeReductionCost
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
Definition: BasicTTIImpl.h:2023
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1366
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:160
llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:286
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1160
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:151
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
MachineValueType.h
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:942
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::ElementCount::isScalar
bool isScalar() const
Counting predicates.
Definition: TypeSize.h:396
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:48
llvm::BasicTTIImplBase::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:638
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:419
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2408
Instruction.h
llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Definition: TargetTransformInfoImpl.h:217
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
TargetLowering.h
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:154
llvm::MCSubtargetInfo::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
Definition: MCSubtargetInfo.cpp:355
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:868
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:548
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1114
llvm::MCSubtargetInfo::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
Definition: MCSubtargetInfo.cpp:351
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:488
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1279
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:668
llvm::TargetTransformInfoImplBase
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:34
llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:484
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::TargetTransformInfoImplBase::getCacheAssociativity
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: TargetTransformInfoImpl.h:431
Constants.h
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1108
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:886
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:859
llvm::BasicTTIImplBase::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis()
Definition: BasicTTIImpl.h:262
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Triple::isOSDarwin
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS or watchOS).
Definition: Triple.h:485
llvm::BasicTTIImplBase::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:654
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1062
llvm::BasicTTIImplBase::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:1997
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2363
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:154
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:268
InstrTypes.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2110
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:883
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:1046
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
llvm::BasicTTIImplBase::getCacheSize
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:618
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: TargetLowering.h:600
llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:749
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1289
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetLoweringBase.cpp:1923
llvm::TargetMachine::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: TargetMachine.h:320
llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent()
Definition: BasicTTIImpl.h:486
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:882
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::TargetTransformInfoImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition: TargetTransformInfoImpl.h:526
llvm::BasicTTIImplBase::getCacheLineSize
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:634
BitVector.h
llvm::TargetTransformInfoImplCRTPBase
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:855
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1310
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:686
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:310
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1060
llvm::PartialUnrollingThreshold
cl::opt< unsigned > PartialUnrollingThreshold
llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:372
llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:890
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
Type.h
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:118
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables()
Definition: BasicTTIImpl.h:431
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1307
LoopInfo.h
llvm::TargetTransformInfoImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Definition: TargetTransformInfoImpl.h:167
llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfoImpl.h:219
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1310
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:118
llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: TargetTransformInfoImpl.h:183
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:873
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:861
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:911
BasicBlock.h
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1063
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:77
llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:299
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:742
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:866
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: TargetMachine.h:307
llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:591
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:876
llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:462
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1290
llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:600
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
uint64_t
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:727
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:414
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition: TargetLowering.h:1238
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:895
llvm::TargetLoweringBase::getLoadExtAction
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition: TargetLowering.h:1226
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:881
llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition: TargetLowering.h:1188
llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:310
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition: TargetLowering.h:1128
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition: TargetTransformInfoImpl.h:177
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:428
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:558
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfoImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:482
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:886
llvm::BasicTTIImplBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:366
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:515
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
ArrayRef.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:543
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1341
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetLowering.h:1641
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::BasicTTIImplBase::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:667
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:118
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:753
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:138
llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1176
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:872
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:780
llvm::BasicTTIImpl::BasicTTIImpl
BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition: BasicTargetTransformInfo.cpp:32
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1310
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:878
llvm::ElementCount::isVector
bool isVector() const
One or more elements.
Definition: TypeSize.h:398
llvm::TargetSubtargetInfo::useAA
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Definition: TargetSubtargetInfo.cpp:60
llvm::MCSubtargetInfo::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Definition: MCSubtargetInfo.cpp:363
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:650
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2118
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:539
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:912
llvm::ReplayInlineScope::Function
@ Function
llvm::BasicTTIImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask()
Definition: BasicTTIImpl.h:582
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1083
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1310
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
DataLayout.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:672
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:745
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::BasicTTIImplBase::getRegUsageForType
InstructionCost getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:360
llvm::TargetTransformInfoImplCRTPBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: TargetTransformInfoImpl.h:1152
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2361
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TargetSubtargetInfo.h
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:877
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1760
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:223
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:535
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:878
llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:560
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1287
llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:493
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:421
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:879
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:697
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:286
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:127
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:865
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1341
llvm::BasicTTIImplBase::useAA
bool useAA() const
Definition: BasicTTIImpl.h:353
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1288
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition: TargetLoweringBase.cpp:945
llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfoImpl.h:453
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:868
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:162
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:339
llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:568
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2133
Constant.h
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2362
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1296
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition: TargetLowering.h:2725
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:392
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:899
llvm::BasicTTIImpl
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:2206
llvm::KnownBits
Definition: KnownBits.h:23
llvm::BasicTTIImplBase::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:1985
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2364
llvm::TargetLoweringBase::isIndexedStoreLegal
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1305
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:495
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2222
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:196
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
ISDOpcodes.h
llvm::TypeSize
Definition: TypeSize.h:417
llvm::MCSchedModel::DefaultLoadLatency
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
Casting.h
llvm::BasicTTIImplBase::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1160
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1213
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:349
llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:291
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:735
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
llvm::Function::isTargetIntrinsic
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
Definition: Function.cpp:745
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:842
llvm::TargetLoweringBase::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetLowering.h:2389
llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:273
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfoImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: TargetTransformInfoImpl.h:171
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1142
llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:95
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2120
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: TargetLowering.h:2512
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:685
llvm::BasicTTIImplBase::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1167
Instructions.h
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:148
llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition: TargetLowering.h:1160
SmallVector.h
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:907
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:743
llvm::BasicTTIImplBase::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)
Definition: BasicTTIImpl.h:866
N
#define N
llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1320
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:670
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:667
llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:1991
TargetTransformInfo.h
llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1116
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2360
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2601
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1291
llvm::SmallVectorImpl< int >
llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:282
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1133
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:313
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:450
llvm::BasicTTIImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
Definition: BasicTTIImpl.h:575
llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:1501
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::BasicTTIImplBase::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:334
llvm::MCSubtargetInfo::getCacheLineSize
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Definition: MCSubtargetInfo.cpp:347
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:665
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3212
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1409
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:156
llvm::MCSubtargetInfo::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
Definition: MCSubtargetInfo.cpp:343
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:213
llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:575
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:882
llvm::BasicTTIImplBase::getOrderedReductionCost
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
Definition: BasicTTIImpl.h:2093
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1282
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:483
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:133
llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence()
Definition: BasicTTIImpl.h:260
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:874
llvm::BasicTTIImplBase::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2178
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:337
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:330
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:153
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:670
llvm::ShuffleVectorInst::isTransposeMask
static bool isTransposeMask(ArrayRef< int > Mask)
Return true if this shuffle mask is a transpose mask.
Definition: Instructions.cpp:2235
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:867
llvm::Triple::aarch64
@ aarch64
Definition: Triple.h:52
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2123
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:929
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:688