LLVM  13.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constant.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Instruction.h"
38 #include "llvm/IR/Instructions.h"
39 #include "llvm/IR/Intrinsics.h"
40 #include "llvm/IR/Operator.h"
41 #include "llvm/IR/Type.h"
42 #include "llvm/IR/Value.h"
43 #include "llvm/Support/Casting.h"
49 #include <algorithm>
50 #include <cassert>
51 #include <cstdint>
52 #include <limits>
53 #include <utility>
54 
55 namespace llvm {
56 
57 class Function;
58 class GlobalValue;
59 class LLVMContext;
60 class ScalarEvolution;
61 class SCEV;
62 class TargetMachine;
63 
64 extern cl::opt<unsigned> PartialUnrollingThreshold;
65 
66 /// Base class which can be used to help build a TTI implementation.
67 ///
68 /// This class provides as much implementation of the TTI interface as is
69 /// possible using the target independent parts of the code generator.
70 ///
71 /// In order to subclass it, your class must implement a getST() method to
72 /// return the subtarget, and a getTLI() method to return the target lowering.
73 /// We need these methods implemented in the derived class so that this class
74 /// doesn't have to duplicate storage for them.
75 template <typename T>
77 private:
79  using TTI = TargetTransformInfo;
80 
81  /// Helper function to access this as a T.
82  T *thisT() { return static_cast<T *>(this); }
83 
84  /// Estimate a cost of Broadcast as an extract and sequence of insert
85  /// operations.
86  unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) {
87  unsigned Cost = 0;
88  // Broadcast cost is equal to the cost of extracting the zero'th element
89  // plus the cost of inserting it into every element of the result vector.
90  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
91 
92  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
93  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
94  }
95  return Cost;
96  }
97 
98  /// Estimate a cost of shuffle as a sequence of extract and insert
99  /// operations.
100  unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) {
101  unsigned Cost = 0;
102  // Shuffle cost is equal to the cost of extracting element from its argument
103  // plus the cost of inserting them onto the result vector.
104 
105  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
106  // index 0 of first vector, index 1 of second vector,index 2 of first
107  // vector and finally index 3 of second vector and insert them at index
108  // <0,1,2,3> of result vector.
109  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
110  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
111  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
112  }
113  return Cost;
114  }
115 
116  /// Estimate a cost of subvector extraction as a sequence of extract and
117  /// insert operations.
118  unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
119  FixedVectorType *SubVTy) {
120  assert(VTy && SubVTy &&
121  "Can only extract subvectors from vectors");
122  int NumSubElts = SubVTy->getNumElements();
123  assert((!isa<FixedVectorType>(VTy) ||
124  (Index + NumSubElts) <=
125  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
126  "SK_ExtractSubvector index out of range");
127 
128  unsigned Cost = 0;
129  // Subvector extraction cost is equal to the cost of extracting element from
130  // the source type plus the cost of inserting them into the result vector
131  // type.
132  for (int i = 0; i != NumSubElts; ++i) {
133  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
134  i + Index);
135  Cost +=
136  thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
137  }
138  return Cost;
139  }
140 
141  /// Estimate a cost of subvector insertion as a sequence of extract and
142  /// insert operations.
143  unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
144  FixedVectorType *SubVTy) {
145  assert(VTy && SubVTy &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubVTy->getNumElements();
148  assert((!isa<FixedVectorType>(VTy) ||
149  (Index + NumSubElts) <=
150  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
151  "SK_InsertSubvector index out of range");
152 
153  unsigned Cost = 0;
154  // Subvector insertion cost is equal to the cost of extracting element from
155  // the source type plus the cost of inserting them into the result vector
156  // type.
157  for (int i = 0; i != NumSubElts; ++i) {
158  Cost +=
159  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
160  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
161  i + Index);
162  }
163  return Cost;
164  }
165 
166  /// Local query method delegates up to T which *must* implement this!
167  const TargetSubtargetInfo *getST() const {
168  return static_cast<const T *>(this)->getST();
169  }
170 
171  /// Local query method delegates up to T which *must* implement this!
172  const TargetLoweringBase *getTLI() const {
173  return static_cast<const T *>(this)->getTLI();
174  }
175 
176  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
177  switch (M) {
178  case TTI::MIM_Unindexed:
179  return ISD::UNINDEXED;
180  case TTI::MIM_PreInc:
181  return ISD::PRE_INC;
182  case TTI::MIM_PreDec:
183  return ISD::PRE_DEC;
184  case TTI::MIM_PostInc:
185  return ISD::POST_INC;
186  case TTI::MIM_PostDec:
187  return ISD::POST_DEC;
188  }
189  llvm_unreachable("Unexpected MemIndexedMode");
190  }
191 
192 protected:
193  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
194  : BaseT(DL) {}
195  virtual ~BasicTTIImplBase() = default;
196 
198 
199 public:
200  /// \name Scalar TTI Implementations
201  /// @{
203  unsigned AddressSpace, Align Alignment,
204  bool *Fast) const {
206  return getTLI()->allowsMisalignedMemoryAccesses(
207  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
208  }
209 
210  bool hasBranchDivergence() { return false; }
211 
212  bool useGPUDivergenceAnalysis() { return false; }
213 
214  bool isSourceOfDivergence(const Value *V) { return false; }
215 
216  bool isAlwaysUniform(const Value *V) { return false; }
217 
218  unsigned getFlatAddressSpace() {
219  // Return an invalid address space.
220  return -1;
221  }
222 
224  Intrinsic::ID IID) const {
225  return false;
226  }
227 
228  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
229  return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
230  }
231 
232  unsigned getAssumedAddrSpace(const Value *V) const {
233  return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
234  }
235 
237  Value *NewV) const {
238  return nullptr;
239  }
240 
241  bool isLegalAddImmediate(int64_t imm) {
242  return getTLI()->isLegalAddImmediate(imm);
243  }
244 
245  bool isLegalICmpImmediate(int64_t imm) {
246  return getTLI()->isLegalICmpImmediate(imm);
247  }
248 
249  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
250  bool HasBaseReg, int64_t Scale,
251  unsigned AddrSpace, Instruction *I = nullptr) {
253  AM.BaseGV = BaseGV;
254  AM.BaseOffs = BaseOffset;
255  AM.HasBaseReg = HasBaseReg;
256  AM.Scale = Scale;
257  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
258  }
259 
261  const DataLayout &DL) const {
262  EVT VT = getTLI()->getValueType(DL, Ty);
263  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
264  }
265 
267  const DataLayout &DL) const {
268  EVT VT = getTLI()->getValueType(DL, Ty);
269  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
270  }
271 
274  }
275 
278  }
279 
282  }
283 
284  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
285  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
287  AM.BaseGV = BaseGV;
288  AM.BaseOffs = BaseOffset;
289  AM.HasBaseReg = HasBaseReg;
290  AM.Scale = Scale;
291  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
292  }
293 
294  bool isTruncateFree(Type *Ty1, Type *Ty2) {
295  return getTLI()->isTruncateFree(Ty1, Ty2);
296  }
297 
299  return getTLI()->isProfitableToHoist(I);
300  }
301 
302  bool useAA() const { return getST()->useAA(); }
303 
304  bool isTypeLegal(Type *Ty) {
305  EVT VT = getTLI()->getValueType(DL, Ty);
306  return getTLI()->isTypeLegal(VT);
307  }
308 
309  unsigned getRegUsageForType(Type *Ty) {
310  return getTLI()->getTypeLegalizationCost(DL, Ty).first;
311  }
312 
313  int getGEPCost(Type *PointeeType, const Value *Ptr,
315  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
316  }
317 
319  unsigned &JumpTableSize,
320  ProfileSummaryInfo *PSI,
322  /// Try to find the estimated number of clusters. Note that the number of
323  /// clusters identified in this function could be different from the actual
324  /// numbers found in lowering. This function ignore switches that are
325  /// lowered with a mix of jump table / bit test / BTree. This function was
326  /// initially intended to be used when estimating the cost of switch in
327  /// inline cost heuristic, but it's a generic cost model to be used in other
328  /// places (e.g., in loop unrolling).
329  unsigned N = SI.getNumCases();
330  const TargetLoweringBase *TLI = getTLI();
331  const DataLayout &DL = this->getDataLayout();
332 
333  JumpTableSize = 0;
334  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
335 
336  // Early exit if both a jump table and bit test are not allowed.
337  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
338  return N;
339 
340  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
341  APInt MinCaseVal = MaxCaseVal;
342  for (auto CI : SI.cases()) {
343  const APInt &CaseVal = CI.getCaseValue()->getValue();
344  if (CaseVal.sgt(MaxCaseVal))
345  MaxCaseVal = CaseVal;
346  if (CaseVal.slt(MinCaseVal))
347  MinCaseVal = CaseVal;
348  }
349 
350  // Check if suitable for a bit test
351  if (N <= DL.getIndexSizeInBits(0u)) {
353  for (auto I : SI.cases())
354  Dests.insert(I.getCaseSuccessor());
355 
356  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
357  DL))
358  return 1;
359  }
360 
361  // Check if suitable for a jump table.
362  if (IsJTAllowed) {
363  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
364  return N;
365  uint64_t Range =
366  (MaxCaseVal - MinCaseVal)
367  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
368  // Check whether a range of clusters is dense enough for a jump table
369  if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
370  JumpTableSize = Range;
371  return 1;
372  }
373  }
374  return N;
375  }
376 
378  const TargetLoweringBase *TLI = getTLI();
381  }
382 
384  const TargetMachine &TM = getTLI()->getTargetMachine();
385  // If non-PIC mode, do not generate a relative lookup table.
386  if (!TM.isPositionIndependent())
387  return false;
388 
389  /// Relative lookup table entries consist of 32-bit offsets.
390  /// Do not generate relative lookup tables for large code models
391  /// in 64-bit achitectures where 32-bit offsets might not be enough.
392  if (TM.getCodeModel() == CodeModel::Medium ||
393  TM.getCodeModel() == CodeModel::Large)
394  return false;
395 
396  Triple TargetTriple = TM.getTargetTriple();
397  if (!TargetTriple.isArch64Bit())
398  return false;
399 
400  // TODO: Triggers an issue in aarch64, so temporarily disable it.
401  // See https://reviews.llvm.org/D99572 for more information.
402  if (TargetTriple.getArch() == Triple::aarch64)
403  return false;
404 
405  return true;
406  }
407 
408  bool haveFastSqrt(Type *Ty) {
409  const TargetLoweringBase *TLI = getTLI();
410  EVT VT = TLI->getValueType(DL, Ty);
411  return TLI->isTypeLegal(VT) &&
413  }
414 
416  return true;
417  }
418 
419  unsigned getFPOpCost(Type *Ty) {
420  // Check whether FADD is available, as a proxy for floating-point in
421  // general.
422  const TargetLoweringBase *TLI = getTLI();
423  EVT VT = TLI->getValueType(DL, Ty);
427  }
428 
429  unsigned getInliningThresholdMultiplier() { return 1; }
430  unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
431 
432  int getInlinerVectorBonusPercent() { return 150; }
433 
436  // This unrolling functionality is target independent, but to provide some
437  // motivation for its intended use, for x86:
438 
439  // According to the Intel 64 and IA-32 Architectures Optimization Reference
440  // Manual, Intel Core models and later have a loop stream detector (and
441  // associated uop queue) that can benefit from partial unrolling.
442  // The relevant requirements are:
443  // - The loop must have no more than 4 (8 for Nehalem and later) branches
444  // taken, and none of them may be calls.
445  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
446 
447  // According to the Software Optimization Guide for AMD Family 15h
448  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
449  // and loop buffer which can benefit from partial unrolling.
450  // The relevant requirements are:
451  // - The loop must have fewer than 16 branches
452  // - The loop must have less than 40 uops in all executed loop branches
453 
454  // The number of taken branches in a loop is hard to estimate here, and
455  // benchmarking has revealed that it is better not to be conservative when
456  // estimating the branch count. As a result, we'll ignore the branch limits
457  // until someone finds a case where it matters in practice.
458 
459  unsigned MaxOps;
460  const TargetSubtargetInfo *ST = getST();
461  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
462  MaxOps = PartialUnrollingThreshold;
463  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
464  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
465  else
466  return;
467 
468  // Scan the loop: don't unroll loops with calls.
469  for (BasicBlock *BB : L->blocks()) {
470  for (Instruction &I : *BB) {
471  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
472  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
473  if (!thisT()->isLoweredToCall(F))
474  continue;
475  }
476 
477  return;
478  }
479  }
480  }
481 
482  // Enable runtime and partial unrolling up to the specified size.
483  // Enable using trip count upper bound to unroll loops.
484  UP.Partial = UP.Runtime = UP.UpperBound = true;
485  UP.PartialThreshold = MaxOps;
486 
487  // Avoid unrolling when optimizing for size.
488  UP.OptSizeThreshold = 0;
490 
491  // Set number of instructions optimized when "back edge"
492  // becomes "fall through" to default value of 2.
493  UP.BEInsns = 2;
494  }
495 
498  PP.PeelCount = 0;
499  PP.AllowPeeling = true;
500  PP.AllowLoopNestsPeeling = false;
501  PP.PeelProfiledIterations = true;
502  }
503 
505  AssumptionCache &AC,
506  TargetLibraryInfo *LibInfo,
507  HardwareLoopInfo &HWLoopInfo) {
508  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
509  }
510 
513  DominatorTree *DT,
514  const LoopAccessInfo *LAI) {
515  return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
516  }
517 
520  }
521 
523  IntrinsicInst &II) {
524  return BaseT::instCombineIntrinsic(IC, II);
525  }
526 
528  IntrinsicInst &II,
529  APInt DemandedMask,
530  KnownBits &Known,
531  bool &KnownBitsComputed) {
532  return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
533  KnownBitsComputed);
534  }
535 
537  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
538  APInt &UndefElts2, APInt &UndefElts3,
539  std::function<void(Instruction *, unsigned, APInt, APInt &)>
540  SimplifyAndSetOp) {
542  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
543  SimplifyAndSetOp);
544  }
545 
547  if (isa<LoadInst>(I))
548  return getST()->getSchedModel().DefaultLoadLatency;
549 
551  }
552 
553  virtual Optional<unsigned>
555  return Optional<unsigned>(
556  getST()->getCacheSize(static_cast<unsigned>(Level)));
557  }
558 
559  virtual Optional<unsigned>
561  Optional<unsigned> TargetResult =
562  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
563 
564  if (TargetResult)
565  return TargetResult;
566 
568  }
569 
570  virtual unsigned getCacheLineSize() const {
571  return getST()->getCacheLineSize();
572  }
573 
574  virtual unsigned getPrefetchDistance() const {
575  return getST()->getPrefetchDistance();
576  }
577 
578  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
579  unsigned NumStridedMemAccesses,
580  unsigned NumPrefetches,
581  bool HasCall) const {
582  return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
583  NumPrefetches, HasCall);
584  }
585 
586  virtual unsigned getMaxPrefetchIterationsAhead() const {
587  return getST()->getMaxPrefetchIterationsAhead();
588  }
589 
590  virtual bool enableWritePrefetching() const {
591  return getST()->enableWritePrefetching();
592  }
593 
594  /// @}
595 
596  /// \name Vector TTI Implementations
597  /// @{
598 
600  return TypeSize::getFixed(32);
601  }
602 
604 
605  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
606  /// are set if the demanded result elements need to be inserted and/or
607  /// extracted from vectors.
608  unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,
609  bool Insert, bool Extract) {
610  /// FIXME: a bitfield is not a reasonable abstraction for talking about
611  /// which elements are needed from a scalable vector
612  auto *Ty = cast<FixedVectorType>(InTy);
613 
614  assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
615  "Vector size mismatch");
616 
617  unsigned Cost = 0;
618 
619  for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
620  if (!DemandedElts[i])
621  continue;
622  if (Insert)
623  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
624  if (Extract)
625  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
626  }
627 
628  return Cost;
629  }
630 
631  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
632  unsigned getScalarizationOverhead(VectorType *InTy, bool Insert,
633  bool Extract) {
634  auto *Ty = cast<FixedVectorType>(InTy);
635 
636  APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
637  return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
638  }
639 
640  /// Estimate the overhead of scalarizing an instructions unique
641  /// non-constant operands. The (potentially vector) types to use for each of
642  /// argument are passes via Tys.
644  ArrayRef<Type *> Tys) {
645  assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
646 
647  unsigned Cost = 0;
648  SmallPtrSet<const Value*, 4> UniqueOperands;
649  for (int I = 0, E = Args.size(); I != E; I++) {
650  // Disregard things like metadata arguments.
651  const Value *A = Args[I];
652  Type *Ty = Tys[I];
653  if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
654  !Ty->isPtrOrPtrVectorTy())
655  continue;
656 
657  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
658  if (auto *VecTy = dyn_cast<VectorType>(Ty))
659  Cost += getScalarizationOverhead(VecTy, false, true);
660  }
661  }
662 
663  return Cost;
664  }
665 
666  /// Estimate the overhead of scalarizing the inputs and outputs of an
667  /// instruction, with return type RetTy and arguments Args of type Tys. If
668  /// Args are unknown (empty), then the cost associated with one argument is
669  /// added as a heuristic.
672  ArrayRef<Type *> Tys) {
673  unsigned Cost = 0;
674 
675  Cost += getScalarizationOverhead(RetTy, true, false);
676  if (!Args.empty())
678  else
679  // When no information on arguments is provided, we add the cost
680  // associated with one argument as a heuristic.
681  Cost += getScalarizationOverhead(RetTy, false, true);
682 
683  return Cost;
684  }
685 
686  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
687 
689  unsigned Opcode, Type *Ty,
696  const Instruction *CxtI = nullptr) {
697  // Check if any of the operands are vector operands.
698  const TargetLoweringBase *TLI = getTLI();
699  int ISD = TLI->InstructionOpcodeToISD(Opcode);
700  assert(ISD && "Invalid opcode");
701 
702  // TODO: Handle more cost kinds.
704  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
705  Opd1Info, Opd2Info,
706  Opd1PropInfo, Opd2PropInfo,
707  Args, CxtI);
708 
709  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
710 
711  bool IsFloat = Ty->isFPOrFPVectorTy();
712  // Assume that floating point arithmetic operations cost twice as much as
713  // integer operations.
714  unsigned OpCost = (IsFloat ? 2 : 1);
715 
716  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
717  // The operation is legal. Assume it costs 1.
718  // TODO: Once we have extract/insert subvector cost we need to use them.
719  return LT.first * OpCost;
720  }
721 
722  if (!TLI->isOperationExpand(ISD, LT.second)) {
723  // If the operation is custom lowered, then assume that the code is twice
724  // as expensive.
725  return LT.first * 2 * OpCost;
726  }
727 
728  // Else, assume that we need to scalarize this op.
729  // TODO: If one of the types get legalized by splitting, handle this
730  // similarly to what getCastInstrCost() does.
731  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
732  unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
733  unsigned Cost = thisT()->getArithmeticInstrCost(
734  Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
735  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
736  // Return the cost of multiple scalar invocation plus the cost of
737  // inserting and extracting the values.
738  SmallVector<Type *> Tys(Args.size(), Ty);
739  return getScalarizationOverhead(VTy, Args, Tys) + Num * Cost;
740  }
741 
742  // We don't know anything about this scalar instruction.
743  return OpCost;
744  }
745 
747  ArrayRef<int> Mask, int Index, VectorType *SubTp) {
748 
749  switch (Kind) {
750  case TTI::SK_Broadcast:
751  return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
752  case TTI::SK_Select:
753  case TTI::SK_Reverse:
754  case TTI::SK_Transpose:
757  return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
759  return getExtractSubvectorOverhead(Tp, Index,
760  cast<FixedVectorType>(SubTp));
762  return getInsertSubvectorOverhead(Tp, Index,
763  cast<FixedVectorType>(SubTp));
764  }
765  llvm_unreachable("Unknown TTI::ShuffleKind");
766  }
767 
768  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
771  const Instruction *I = nullptr) {
772  if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
773  return 0;
774 
775  const TargetLoweringBase *TLI = getTLI();
776  int ISD = TLI->InstructionOpcodeToISD(Opcode);
777  assert(ISD && "Invalid opcode");
778  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
779  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
780 
781  TypeSize SrcSize = SrcLT.second.getSizeInBits();
782  TypeSize DstSize = DstLT.second.getSizeInBits();
783  bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
784  bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
785 
786  switch (Opcode) {
787  default:
788  break;
789  case Instruction::Trunc:
790  // Check for NOOP conversions.
791  if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
792  return 0;
794  case Instruction::BitCast:
795  // Bitcast between types that are legalized to the same type are free and
796  // assume int to/from ptr of the same size is also free.
797  if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
798  SrcSize == DstSize)
799  return 0;
800  break;
801  case Instruction::FPExt:
802  if (I && getTLI()->isExtFree(I))
803  return 0;
804  break;
805  case Instruction::ZExt:
806  if (TLI->isZExtFree(SrcLT.second, DstLT.second))
807  return 0;
809  case Instruction::SExt:
810  if (I && getTLI()->isExtFree(I))
811  return 0;
812 
813  // If this is a zext/sext of a load, return 0 if the corresponding
814  // extending load exists on target and the result type is legal.
815  if (CCH == TTI::CastContextHint::Normal) {
816  EVT ExtVT = EVT::getEVT(Dst);
817  EVT LoadVT = EVT::getEVT(Src);
818  unsigned LType =
819  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
820  if (DstLT.first == SrcLT.first &&
821  TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
822  return 0;
823  }
824  break;
825  case Instruction::AddrSpaceCast:
827  Dst->getPointerAddressSpace()))
828  return 0;
829  break;
830  }
831 
832  auto *SrcVTy = dyn_cast<VectorType>(Src);
833  auto *DstVTy = dyn_cast<VectorType>(Dst);
834 
835  // If the cast is marked as legal (or promote) then assume low cost.
836  if (SrcLT.first == DstLT.first &&
837  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
838  return SrcLT.first;
839 
840  // Handle scalar conversions.
841  if (!SrcVTy && !DstVTy) {
842  // Just check the op cost. If the operation is legal then assume it costs
843  // 1.
844  if (!TLI->isOperationExpand(ISD, DstLT.second))
845  return 1;
846 
847  // Assume that illegal scalar instruction are expensive.
848  return 4;
849  }
850 
851  // Check vector-to-vector casts.
852  if (DstVTy && SrcVTy) {
853  // If the cast is between same-sized registers, then the check is simple.
854  if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
855 
856  // Assume that Zext is done using AND.
857  if (Opcode == Instruction::ZExt)
858  return SrcLT.first;
859 
860  // Assume that sext is done using SHL and SRA.
861  if (Opcode == Instruction::SExt)
862  return SrcLT.first * 2;
863 
864  // Just check the op cost. If the operation is legal then assume it
865  // costs
866  // 1 and multiply by the type-legalization overhead.
867  if (!TLI->isOperationExpand(ISD, DstLT.second))
868  return SrcLT.first * 1;
869  }
870 
871  // If we are legalizing by splitting, query the concrete TTI for the cost
872  // of casting the original vector twice. We also need to factor in the
873  // cost of the split itself. Count that as 1, to be consistent with
874  // TLI->getTypeLegalizationCost().
875  bool SplitSrc =
876  TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
878  bool SplitDst =
879  TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
881  if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
882  DstVTy->getElementCount().isVector()) {
883  Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
884  Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
885  T *TTI = static_cast<T *>(this);
886  // If both types need to be split then the split is free.
887  InstructionCost SplitCost =
888  (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
889  return SplitCost +
890  (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
891  CostKind, I));
892  }
893 
894  // In other cases where the source or destination are illegal, assume
895  // the operation will get scalarized.
896  unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
897  InstructionCost Cost = thisT()->getCastInstrCost(
898  Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
899 
900  // Return the cost of multiple scalar invocation plus the cost of
901  // inserting and extracting the values.
902  return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
903  }
904 
905  // We already handled vector-to-vector and scalar-to-scalar conversions.
906  // This
907  // is where we handle bitcast between vectors and scalars. We need to assume
908  // that the conversion is scalarized in one way or another.
909  if (Opcode == Instruction::BitCast) {
910  // Illegal bitcasts are done by storing and loading from a stack slot.
911  return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
912  (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
913  }
914 
915  llvm_unreachable("Unhandled cast");
916  }
917 
919  VectorType *VecTy, unsigned Index) {
920  return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
921  Index) +
922  thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
924  }
925 
926  unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
927  const Instruction *I = nullptr) {
928  return BaseT::getCFInstrCost(Opcode, CostKind, I);
929  }
930 
931  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
932  CmpInst::Predicate VecPred,
934  const Instruction *I = nullptr) {
935  const TargetLoweringBase *TLI = getTLI();
936  int ISD = TLI->InstructionOpcodeToISD(Opcode);
937  assert(ISD && "Invalid opcode");
938 
939  // TODO: Handle other cost kinds.
941  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
942  I);
943 
944  // Selects on vectors are actually vector selects.
945  if (ISD == ISD::SELECT) {
946  assert(CondTy && "CondTy must exist");
947  if (CondTy->isVectorTy())
948  ISD = ISD::VSELECT;
949  }
950  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
951 
952  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
953  !TLI->isOperationExpand(ISD, LT.second)) {
954  // The operation is legal. Assume it costs 1. Multiply
955  // by the type-legalization overhead.
956  return LT.first * 1;
957  }
958 
959  // Otherwise, assume that the cast is scalarized.
960  // TODO: If one of the types get legalized by splitting, handle this
961  // similarly to what getCastInstrCost() does.
962  if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
963  unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
964  if (CondTy)
965  CondTy = CondTy->getScalarType();
966  InstructionCost Cost = thisT()->getCmpSelInstrCost(
967  Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
968 
969  // Return the cost of multiple scalar invocation plus the cost of
970  // inserting and extracting the values.
971  return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
972  }
973 
974  // Unknown scalar opcode.
975  return 1;
976  }
977 
978  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
979  std::pair<unsigned, MVT> LT =
980  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
981 
982  return LT.first;
983  }
984 
985  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
986  MaybeAlign Alignment, unsigned AddressSpace,
988  const Instruction *I = nullptr) {
989  assert(!Src->isVoidTy() && "Invalid type");
990  // Assume types, such as structs, are expensive.
991  if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
992  return 4;
993  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
994 
995  // Assuming that all loads of legal types cost 1.
996  InstructionCost Cost = LT.first;
998  return Cost;
999 
1000  if (Src->isVectorTy() &&
1001  // In practice it's not currently possible to have a change in lane
1002  // length for extending loads or truncating stores so both types should
1003  // have the same scalable property.
1005  LT.second.getSizeInBits())) {
1006  // This is a vector load that legalizes to a larger type than the vector
1007  // itself. Unless the corresponding extending load or truncating store is
1008  // legal, then this will scalarize.
1010  EVT MemVT = getTLI()->getValueType(DL, Src);
1011  if (Opcode == Instruction::Store)
1012  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1013  else
1014  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1015 
1016  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
1017  // This is a vector load/store for some illegal type that is scalarized.
1018  // We must account for the cost of building or decomposing the vector.
1019  Cost += getScalarizationOverhead(cast<VectorType>(Src),
1020  Opcode != Instruction::Store,
1021  Opcode == Instruction::Store);
1022  }
1023  }
1024 
1025  return Cost;
1026  }
1027 
1028  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1029  const Value *Ptr, bool VariableMask,
1030  Align Alignment,
1032  const Instruction *I = nullptr) {
1033  auto *VT = cast<FixedVectorType>(DataTy);
1034  // Assume the target does not have support for gather/scatter operations
1035  // and provide a rough estimate.
1036  //
1037  // First, compute the cost of extracting the individual addresses and the
1038  // individual memory operations.
1039  InstructionCost LoadCost =
1040  VT->getNumElements() *
1042  Instruction::ExtractElement,
1043  FixedVectorType::get(PointerType::get(VT->getElementType(), 0),
1044  VT->getNumElements()),
1045  -1) +
1046  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
1047 
1048  // Next, compute the cost of packing the result in a vector.
1049  int PackingCost = getScalarizationOverhead(VT, Opcode != Instruction::Store,
1050  Opcode == Instruction::Store);
1051 
1052  int ConditionalCost = 0;
1053  if (VariableMask) {
1054  // Compute the cost of conditionally executing the memory operations with
1055  // variable masks. This includes extracting the individual conditions, a
1056  // branches and PHIs to combine the results.
1057  // NOTE: Estimating the cost of conditionally executing the memory
1058  // operations accurately is quite difficult and the current solution
1059  // provides a very rough estimate only.
1060  ConditionalCost =
1061  VT->getNumElements() *
1063  Instruction::ExtractElement,
1065  VT->getNumElements()),
1066  -1) +
1067  getCFInstrCost(Instruction::Br, CostKind) +
1068  getCFInstrCost(Instruction::PHI, CostKind));
1069  }
1070 
1071  return LoadCost + PackingCost + ConditionalCost;
1072  }
1073 
1075  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1076  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1077  bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1078  auto *VT = cast<FixedVectorType>(VecTy);
1079 
1080  unsigned NumElts = VT->getNumElements();
1081  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1082 
1083  unsigned NumSubElts = NumElts / Factor;
1084  auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1085 
1086  // Firstly, the cost of load/store operation.
1087  InstructionCost Cost;
1088  if (UseMaskForCond || UseMaskForGaps)
1089  Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1091  else
1092  Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1093  CostKind);
1094 
1095  // Legalize the vector type, and get the legalized and unlegalized type
1096  // sizes.
1097  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
1098  unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1099  unsigned VecTyLTSize = VecTyLT.getStoreSize();
1100 
1101  // Return the ceiling of dividing A by B.
1102  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
1103 
1104  // Scale the cost of the memory operation by the fraction of legalized
1105  // instructions that will actually be used. We shouldn't account for the
1106  // cost of dead instructions since they will be removed.
1107  //
1108  // E.g., An interleaved load of factor 8:
1109  // %vec = load <16 x i64>, <16 x i64>* %ptr
1110  // %v0 = shufflevector %vec, undef, <0, 8>
1111  //
1112  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1113  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1114  // type). The other loads are unused.
1115  //
1116  // We only scale the cost of loads since interleaved store groups aren't
1117  // allowed to have gaps.
1118  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
1119  // The number of loads of a legal type it will take to represent a load
1120  // of the unlegalized vector type.
1121  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
1122 
1123  // The number of elements of the unlegalized type that correspond to a
1124  // single legal instruction.
1125  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
1126 
1127  // Determine which legal instructions will be used.
1128  BitVector UsedInsts(NumLegalInsts, false);
1129  for (unsigned Index : Indices)
1130  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1131  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1132 
1133  // Scale the cost of the load by the fraction of legal instructions that
1134  // will be used.
1135  Cost *= UsedInsts.count() / NumLegalInsts;
1136  }
1137 
1138  // Then plus the cost of interleave operation.
1139  if (Opcode == Instruction::Load) {
1140  // The interleave cost is similar to extract sub vectors' elements
1141  // from the wide vector, and insert them into sub vectors.
1142  //
1143  // E.g. An interleaved load of factor 2 (with one member of index 0):
1144  // %vec = load <8 x i32>, <8 x i32>* %ptr
1145  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1146  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1147  // <8 x i32> vector and insert them into a <4 x i32> vector.
1148 
1149  assert(Indices.size() <= Factor &&
1150  "Interleaved memory op has too many members");
1151 
1152  for (unsigned Index : Indices) {
1153  assert(Index < Factor && "Invalid index for interleaved memory op");
1154 
1155  // Extract elements from loaded vector for each sub vector.
1156  for (unsigned i = 0; i < NumSubElts; i++)
1157  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
1158  Index + i * Factor);
1159  }
1160 
1161  unsigned InsSubCost = 0;
1162  for (unsigned i = 0; i < NumSubElts; i++)
1163  InsSubCost +=
1164  thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
1165 
1166  Cost += Indices.size() * InsSubCost;
1167  } else {
1168  // The interleave cost is extract all elements from sub vectors, and
1169  // insert them into the wide vector.
1170  //
1171  // E.g. An interleaved store of factor 2:
1172  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1173  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1174  // The cost is estimated as extract all elements from both <4 x i32>
1175  // vectors and insert into the <8 x i32> vector.
1176 
1177  unsigned ExtSubCost = 0;
1178  for (unsigned i = 0; i < NumSubElts; i++)
1179  ExtSubCost +=
1180  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
1181  Cost += ExtSubCost * Factor;
1182 
1183  for (unsigned i = 0; i < NumElts; i++)
1184  Cost += static_cast<T *>(this)
1185  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1186  }
1187 
1188  if (!UseMaskForCond)
1189  return Cost;
1190 
1191  Type *I8Type = Type::getInt8Ty(VT->getContext());
1192  auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1193  SubVT = FixedVectorType::get(I8Type, NumSubElts);
1194 
1195  // The Mask shuffling cost is extract all the elements of the Mask
1196  // and insert each of them Factor times into the wide vector:
1197  //
1198  // E.g. an interleaved group with factor 3:
1199  // %mask = icmp ult <8 x i32> %vec1, %vec2
1200  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1201  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1202  // The cost is estimated as extract all mask elements from the <8xi1> mask
1203  // vector and insert them factor times into the <24xi1> shuffled mask
1204  // vector.
1205  for (unsigned i = 0; i < NumSubElts; i++)
1206  Cost +=
1207  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
1208 
1209  for (unsigned i = 0; i < NumElts; i++)
1210  Cost +=
1211  thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
1212 
1213  // The Gaps mask is invariant and created outside the loop, therefore the
1214  // cost of creating it is not accounted for here. However if we have both
1215  // a MaskForGaps and some other mask that guards the execution of the
1216  // memory access, we need to account for the cost of And-ing the two masks
1217  // inside the loop.
1218  if (UseMaskForGaps)
1219  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1220  CostKind);
1221 
1222  return Cost;
1223  }
1224 
1225  /// Get intrinsic cost based on arguments.
1228  // Check for generically free intrinsics.
1229  if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1230  return 0;
1231 
1232  // Assume that target intrinsics are cheap.
1233  Intrinsic::ID IID = ICA.getID();
1234  if (Function::isTargetIntrinsic(IID))
1236 
1237  if (ICA.isTypeBasedOnly())
1239 
1240  Type *RetTy = ICA.getReturnType();
1241 
1242  ElementCount RetVF =
1243  (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1244  : ElementCount::getFixed(1));
1245  const IntrinsicInst *I = ICA.getInst();
1247  FastMathFlags FMF = ICA.getFlags();
1248  switch (IID) {
1249  default:
1250  break;
1251 
1252  case Intrinsic::cttz:
1253  // FIXME: If necessary, this should go in target-specific overrides.
1254  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
1256  break;
1257 
1258  case Intrinsic::ctlz:
1259  // FIXME: If necessary, this should go in target-specific overrides.
1260  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz())
1262  break;
1263 
1264  case Intrinsic::memcpy:
1265  return thisT()->getMemcpyCost(ICA.getInst());
1266 
1267  case Intrinsic::masked_scatter: {
1268  const Value *Mask = Args[3];
1269  bool VarMask = !isa<Constant>(Mask);
1270  Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1271  return thisT()->getGatherScatterOpCost(Instruction::Store,
1272  ICA.getArgTypes()[0], Args[1],
1273  VarMask, Alignment, CostKind, I);
1274  }
1275  case Intrinsic::masked_gather: {
1276  const Value *Mask = Args[2];
1277  bool VarMask = !isa<Constant>(Mask);
1278  Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1279  return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1280  VarMask, Alignment, CostKind, I);
1281  }
1282  case Intrinsic::experimental_stepvector: {
1283  if (isa<ScalableVectorType>(RetTy))
1285  // The cost of materialising a constant integer vector.
1287  }
1288  case Intrinsic::experimental_vector_extract: {
1289  // FIXME: Handle case where a scalable vector is extracted from a scalable
1290  // vector
1291  if (isa<ScalableVectorType>(RetTy))
1293  unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1294  return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1295  cast<VectorType>(Args[0]->getType()), None,
1296  Index, cast<VectorType>(RetTy));
1297  }
1298  case Intrinsic::experimental_vector_insert: {
1299  // FIXME: Handle case where a scalable vector is inserted into a scalable
1300  // vector
1301  if (isa<ScalableVectorType>(Args[1]->getType()))
1303  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1304  return thisT()->getShuffleCost(
1305  TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None,
1306  Index, cast<VectorType>(Args[1]->getType()));
1307  }
1308  case Intrinsic::experimental_vector_reverse: {
1309  return thisT()->getShuffleCost(TTI::SK_Reverse,
1310  cast<VectorType>(Args[0]->getType()), None,
1311  0, cast<VectorType>(RetTy));
1312  }
1313  case Intrinsic::vector_reduce_add:
1314  case Intrinsic::vector_reduce_mul:
1315  case Intrinsic::vector_reduce_and:
1316  case Intrinsic::vector_reduce_or:
1317  case Intrinsic::vector_reduce_xor:
1318  case Intrinsic::vector_reduce_smax:
1319  case Intrinsic::vector_reduce_smin:
1320  case Intrinsic::vector_reduce_fmax:
1321  case Intrinsic::vector_reduce_fmin:
1322  case Intrinsic::vector_reduce_umax:
1323  case Intrinsic::vector_reduce_umin: {
1324  IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1326  }
1327  case Intrinsic::vector_reduce_fadd:
1328  case Intrinsic::vector_reduce_fmul: {
1330  IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1332  }
1333  case Intrinsic::fshl:
1334  case Intrinsic::fshr: {
1335  if (isa<ScalableVectorType>(RetTy))
1337  const Value *X = Args[0];
1338  const Value *Y = Args[1];
1339  const Value *Z = Args[2];
1340  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1341  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1342  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1343  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1345  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1346  : TTI::OP_None;
1347  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1348  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1349  InstructionCost Cost = 0;
1350  Cost +=
1351  thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1352  Cost +=
1353  thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1354  Cost += thisT()->getArithmeticInstrCost(
1355  BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
1356  Cost += thisT()->getArithmeticInstrCost(
1357  BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
1358  // Non-constant shift amounts requires a modulo.
1359  if (OpKindZ != TTI::OK_UniformConstantValue &&
1361  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1362  CostKind, OpKindZ, OpKindBW,
1363  OpPropsZ, OpPropsBW);
1364  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1365  if (X != Y) {
1366  Type *CondTy = RetTy->getWithNewBitWidth(1);
1367  Cost +=
1368  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1370  Cost +=
1371  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1373  }
1374  return Cost;
1375  }
1376  }
1377 
1378  // Assume that we need to scalarize this intrinsic.
1379  // Compute the scalarization overhead based on Args for a vector
1380  // intrinsic.
1381  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1382  if (RetVF.isVector() && !RetVF.isScalable()) {
1383  ScalarizationCost = 0;
1384  if (!RetTy->isVoidTy())
1385  ScalarizationCost +=
1386  getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1387  ScalarizationCost +=
1389  }
1390 
1391  IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1392  ScalarizationCost);
1393  return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1394  }
1395 
1396  /// Get intrinsic cost based on argument types.
1397  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1398  /// cost of scalarizing the arguments and the return value will be computed
1399  /// based on types.
1403  Intrinsic::ID IID = ICA.getID();
1404  Type *RetTy = ICA.getReturnType();
1405  const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1406  FastMathFlags FMF = ICA.getFlags();
1407  unsigned ScalarizationCostPassed = ICA.getScalarizationCost();
1408  bool SkipScalarizationCost = ICA.skipScalarizationCost();
1409 
1410  VectorType *VecOpTy = nullptr;
1411  if (!Tys.empty()) {
1412  // The vector reduction operand is operand 0 except for fadd/fmul.
1413  // Their operand 0 is a scalar start value, so the vector op is operand 1.
1414  unsigned VecTyIndex = 0;
1415  if (IID == Intrinsic::vector_reduce_fadd ||
1416  IID == Intrinsic::vector_reduce_fmul)
1417  VecTyIndex = 1;
1418  assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1419  VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1420  }
1421 
1422  // Library call cost - other than size, make it expensive.
1423  unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
1425  switch (IID) {
1426  default: {
1427  // Scalable vectors cannot be scalarized, so return Invalid.
1428  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1429  return isa<ScalableVectorType>(Ty);
1430  }))
1431  return InstructionCost::getInvalid();
1432 
1433  // Assume that we need to scalarize this intrinsic.
1434  InstructionCost ScalarizationCost = ScalarizationCostPassed;
1435  unsigned ScalarCalls = 1;
1436  Type *ScalarRetTy = RetTy;
1437  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1438  if (!SkipScalarizationCost)
1439  ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1440  ScalarCalls = std::max(ScalarCalls,
1441  cast<FixedVectorType>(RetVTy)->getNumElements());
1442  ScalarRetTy = RetTy->getScalarType();
1443  }
1444  SmallVector<Type *, 4> ScalarTys;
1445  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1446  Type *Ty = Tys[i];
1447  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1448  if (!SkipScalarizationCost)
1449  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1450  ScalarCalls = std::max(ScalarCalls,
1451  cast<FixedVectorType>(VTy)->getNumElements());
1452  Ty = Ty->getScalarType();
1453  }
1454  ScalarTys.push_back(Ty);
1455  }
1456  if (ScalarCalls == 1)
1457  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1458 
1459  IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1460  InstructionCost ScalarCost =
1461  thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1462 
1463  return ScalarCalls * ScalarCost + ScalarizationCost;
1464  }
1465  // Look for intrinsics that can be lowered directly or turned into a scalar
1466  // intrinsic call.
1467  case Intrinsic::sqrt:
1468  ISDs.push_back(ISD::FSQRT);
1469  break;
1470  case Intrinsic::sin:
1471  ISDs.push_back(ISD::FSIN);
1472  break;
1473  case Intrinsic::cos:
1474  ISDs.push_back(ISD::FCOS);
1475  break;
1476  case Intrinsic::exp:
1477  ISDs.push_back(ISD::FEXP);
1478  break;
1479  case Intrinsic::exp2:
1480  ISDs.push_back(ISD::FEXP2);
1481  break;
1482  case Intrinsic::log:
1483  ISDs.push_back(ISD::FLOG);
1484  break;
1485  case Intrinsic::log10:
1486  ISDs.push_back(ISD::FLOG10);
1487  break;
1488  case Intrinsic::log2:
1489  ISDs.push_back(ISD::FLOG2);
1490  break;
1491  case Intrinsic::fabs:
1492  ISDs.push_back(ISD::FABS);
1493  break;
1494  case Intrinsic::canonicalize:
1495  ISDs.push_back(ISD::FCANONICALIZE);
1496  break;
1497  case Intrinsic::minnum:
1498  ISDs.push_back(ISD::FMINNUM);
1499  break;
1500  case Intrinsic::maxnum:
1501  ISDs.push_back(ISD::FMAXNUM);
1502  break;
1503  case Intrinsic::minimum:
1504  ISDs.push_back(ISD::FMINIMUM);
1505  break;
1506  case Intrinsic::maximum:
1507  ISDs.push_back(ISD::FMAXIMUM);
1508  break;
1509  case Intrinsic::copysign:
1510  ISDs.push_back(ISD::FCOPYSIGN);
1511  break;
1512  case Intrinsic::floor:
1513  ISDs.push_back(ISD::FFLOOR);
1514  break;
1515  case Intrinsic::ceil:
1516  ISDs.push_back(ISD::FCEIL);
1517  break;
1518  case Intrinsic::trunc:
1519  ISDs.push_back(ISD::FTRUNC);
1520  break;
1521  case Intrinsic::nearbyint:
1522  ISDs.push_back(ISD::FNEARBYINT);
1523  break;
1524  case Intrinsic::rint:
1525  ISDs.push_back(ISD::FRINT);
1526  break;
1527  case Intrinsic::round:
1528  ISDs.push_back(ISD::FROUND);
1529  break;
1530  case Intrinsic::roundeven:
1531  ISDs.push_back(ISD::FROUNDEVEN);
1532  break;
1533  case Intrinsic::pow:
1534  ISDs.push_back(ISD::FPOW);
1535  break;
1536  case Intrinsic::fma:
1537  ISDs.push_back(ISD::FMA);
1538  break;
1539  case Intrinsic::fmuladd:
1540  ISDs.push_back(ISD::FMA);
1541  break;
1542  case Intrinsic::experimental_constrained_fmuladd:
1543  ISDs.push_back(ISD::STRICT_FMA);
1544  break;
1545  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1546  case Intrinsic::lifetime_start:
1547  case Intrinsic::lifetime_end:
1548  case Intrinsic::sideeffect:
1549  case Intrinsic::pseudoprobe:
1550  return 0;
1551  case Intrinsic::masked_store: {
1552  Type *Ty = Tys[0];
1553  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1554  return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1555  CostKind);
1556  }
1557  case Intrinsic::masked_load: {
1558  Type *Ty = RetTy;
1559  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1560  return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1561  CostKind);
1562  }
1563  case Intrinsic::vector_reduce_add:
1564  return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1565  /*IsPairwiseForm=*/false,
1566  CostKind);
1567  case Intrinsic::vector_reduce_mul:
1568  return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1569  /*IsPairwiseForm=*/false,
1570  CostKind);
1571  case Intrinsic::vector_reduce_and:
1572  return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1573  /*IsPairwiseForm=*/false,
1574  CostKind);
1575  case Intrinsic::vector_reduce_or:
1576  return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
1577  /*IsPairwiseForm=*/false,
1578  CostKind);
1579  case Intrinsic::vector_reduce_xor:
1580  return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1581  /*IsPairwiseForm=*/false,
1582  CostKind);
1583  case Intrinsic::vector_reduce_fadd:
1584  // FIXME: Add new flag for cost of strict reductions.
1585  return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1586  /*IsPairwiseForm=*/false,
1587  CostKind);
1588  case Intrinsic::vector_reduce_fmul:
1589  // FIXME: Add new flag for cost of strict reductions.
1590  return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1591  /*IsPairwiseForm=*/false,
1592  CostKind);
1593  case Intrinsic::vector_reduce_smax:
1594  case Intrinsic::vector_reduce_smin:
1595  case Intrinsic::vector_reduce_fmax:
1596  case Intrinsic::vector_reduce_fmin:
1597  return thisT()->getMinMaxReductionCost(
1598  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1599  /*IsPairwiseForm=*/false,
1600  /*IsUnsigned=*/false, CostKind);
1601  case Intrinsic::vector_reduce_umax:
1602  case Intrinsic::vector_reduce_umin:
1603  return thisT()->getMinMaxReductionCost(
1604  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1605  /*IsPairwiseForm=*/false,
1606  /*IsUnsigned=*/true, CostKind);
1607  case Intrinsic::abs:
1608  case Intrinsic::smax:
1609  case Intrinsic::smin:
1610  case Intrinsic::umax:
1611  case Intrinsic::umin: {
1612  // abs(X) = select(icmp(X,0),X,sub(0,X))
1613  // minmax(X,Y) = select(icmp(X,Y),X,Y)
1614  Type *CondTy = RetTy->getWithNewBitWidth(1);
1615  InstructionCost Cost = 0;
1616  // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
1617  Cost +=
1618  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1620  Cost +=
1621  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1623  // TODO: Should we add an OperandValueProperties::OP_Zero property?
1624  if (IID == Intrinsic::abs)
1625  Cost += thisT()->getArithmeticInstrCost(
1626  BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
1627  return Cost;
1628  }
1629  case Intrinsic::sadd_sat:
1630  case Intrinsic::ssub_sat: {
1631  Type *CondTy = RetTy->getWithNewBitWidth(1);
1632 
1633  Type *OpTy = StructType::create({RetTy, CondTy});
1634  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1635  ? Intrinsic::sadd_with_overflow
1636  : Intrinsic::ssub_with_overflow;
1637 
1638  // SatMax -> Overflow && SumDiff < 0
1639  // SatMin -> Overflow && SumDiff >= 0
1640  InstructionCost Cost = 0;
1641  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1642  nullptr, ScalarizationCostPassed);
1643  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1644  Cost +=
1645  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1647  Cost += 2 * thisT()->getCmpSelInstrCost(
1648  BinaryOperator::Select, RetTy, CondTy,
1650  return Cost;
1651  }
1652  case Intrinsic::uadd_sat:
1653  case Intrinsic::usub_sat: {
1654  Type *CondTy = RetTy->getWithNewBitWidth(1);
1655 
1656  Type *OpTy = StructType::create({RetTy, CondTy});
1657  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1658  ? Intrinsic::uadd_with_overflow
1659  : Intrinsic::usub_with_overflow;
1660 
1661  InstructionCost Cost = 0;
1662  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1663  nullptr, ScalarizationCostPassed);
1664  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1665  Cost +=
1666  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1668  return Cost;
1669  }
1670  case Intrinsic::smul_fix:
1671  case Intrinsic::umul_fix: {
1672  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1673  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1674 
1675  unsigned ExtOp =
1676  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1678 
1679  InstructionCost Cost = 0;
1680  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1681  Cost +=
1682  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1683  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1684  CCH, CostKind);
1685  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1688  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1691  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1692  return Cost;
1693  }
1694  case Intrinsic::sadd_with_overflow:
1695  case Intrinsic::ssub_with_overflow: {
1696  Type *SumTy = RetTy->getContainedType(0);
1697  Type *OverflowTy = RetTy->getContainedType(1);
1698  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1700  : BinaryOperator::Sub;
1701 
1702  // LHSSign -> LHS >= 0
1703  // RHSSign -> RHS >= 0
1704  // SumSign -> Sum >= 0
1705  //
1706  // Add:
1707  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1708  // Sub:
1709  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1710  InstructionCost Cost = 0;
1711  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1712  Cost += 3 * thisT()->getCmpSelInstrCost(
1713  Instruction::ICmp, SumTy, OverflowTy,
1715  Cost += 2 * thisT()->getCmpSelInstrCost(
1716  Instruction::Select, OverflowTy, OverflowTy,
1718  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
1719  CostKind);
1720  return Cost;
1721  }
1722  case Intrinsic::uadd_with_overflow:
1723  case Intrinsic::usub_with_overflow: {
1724  Type *SumTy = RetTy->getContainedType(0);
1725  Type *OverflowTy = RetTy->getContainedType(1);
1726  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1728  : BinaryOperator::Sub;
1729 
1730  InstructionCost Cost = 0;
1731  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1732  Cost +=
1733  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1735  return Cost;
1736  }
1737  case Intrinsic::smul_with_overflow:
1738  case Intrinsic::umul_with_overflow: {
1739  Type *MulTy = RetTy->getContainedType(0);
1740  Type *OverflowTy = RetTy->getContainedType(1);
1741  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1742  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1743 
1744  unsigned ExtOp =
1745  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1747 
1748  InstructionCost Cost = 0;
1749  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
1750  Cost +=
1751  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1752  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1753  CCH, CostKind);
1754  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
1757 
1758  if (IID == Intrinsic::smul_with_overflow)
1759  Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1762 
1763  Cost +=
1764  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
1766  return Cost;
1767  }
1768  case Intrinsic::ctpop:
1769  ISDs.push_back(ISD::CTPOP);
1770  // In case of legalization use TCC_Expensive. This is cheaper than a
1771  // library call but still not a cheap instruction.
1772  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1773  break;
1774  case Intrinsic::ctlz:
1775  ISDs.push_back(ISD::CTLZ);
1776  break;
1777  case Intrinsic::cttz:
1778  ISDs.push_back(ISD::CTTZ);
1779  break;
1780  case Intrinsic::bswap:
1781  ISDs.push_back(ISD::BSWAP);
1782  break;
1783  case Intrinsic::bitreverse:
1784  ISDs.push_back(ISD::BITREVERSE);
1785  break;
1786  }
1787 
1788  const TargetLoweringBase *TLI = getTLI();
1789  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1790 
1791  SmallVector<unsigned, 2> LegalCost;
1792  SmallVector<unsigned, 2> CustomCost;
1793  for (unsigned ISD : ISDs) {
1794  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1795  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1796  TLI->isFAbsFree(LT.second)) {
1797  return 0;
1798  }
1799 
1800  // The operation is legal. Assume it costs 1.
1801  // If the type is split to multiple registers, assume that there is some
1802  // overhead to this.
1803  // TODO: Once we have extract/insert subvector cost we need to use them.
1804  if (LT.first > 1)
1805  LegalCost.push_back(LT.first * 2);
1806  else
1807  LegalCost.push_back(LT.first * 1);
1808  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1809  // If the operation is custom lowered then assume
1810  // that the code is twice as expensive.
1811  CustomCost.push_back(LT.first * 2);
1812  }
1813  }
1814 
1815  auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1816  if (MinLegalCostI != LegalCost.end())
1817  return *MinLegalCostI;
1818 
1819  auto MinCustomCostI =
1820  std::min_element(CustomCost.begin(), CustomCost.end());
1821  if (MinCustomCostI != CustomCost.end())
1822  return *MinCustomCostI;
1823 
1824  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1825  // point mul followed by an add.
1826  if (IID == Intrinsic::fmuladd)
1827  return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
1828  CostKind) +
1829  thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
1830  CostKind);
1831  if (IID == Intrinsic::experimental_constrained_fmuladd) {
1832  IntrinsicCostAttributes FMulAttrs(
1833  Intrinsic::experimental_constrained_fmul, RetTy, Tys);
1834  IntrinsicCostAttributes FAddAttrs(
1835  Intrinsic::experimental_constrained_fadd, RetTy, Tys);
1836  return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
1837  thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
1838  }
1839 
1840  // Else, assume that we need to scalarize this intrinsic. For math builtins
1841  // this will emit a costly libcall, adding call overhead and spills. Make it
1842  // very expensive.
1843  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1844  // Scalable vectors cannot be scalarized, so return Invalid.
1845  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1846  return isa<ScalableVectorType>(Ty);
1847  }))
1848  return InstructionCost::getInvalid();
1849 
1850  unsigned ScalarizationCost = SkipScalarizationCost ?
1851  ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false);
1852 
1853  unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
1854  SmallVector<Type *, 4> ScalarTys;
1855  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1856  Type *Ty = Tys[i];
1857  if (Ty->isVectorTy())
1858  Ty = Ty->getScalarType();
1859  ScalarTys.push_back(Ty);
1860  }
1861  IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
1862  InstructionCost ScalarCost =
1863  thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1864  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1865  if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
1866  if (!ICA.skipScalarizationCost())
1867  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1868  ScalarCalls = std::max(ScalarCalls,
1869  cast<FixedVectorType>(VTy)->getNumElements());
1870  }
1871  }
1872  return ScalarCalls * ScalarCost + ScalarizationCost;
1873  }
1874 
1875  // This is going to be turned into a library call, make it expensive.
1876  return SingleCallCost;
1877  }
1878 
1879  /// Compute a cost of the given call instruction.
1880  ///
1881  /// Compute the cost of calling function F with return type RetTy and
1882  /// argument types Tys. F might be nullptr, in this case the cost of an
1883  /// arbitrary call with the specified signature will be returned.
1884  /// This is used, for instance, when we estimate call of a vector
1885  /// counterpart of the given function.
1886  /// \param F Called function, might be nullptr.
1887  /// \param RetTy Return value types.
1888  /// \param Tys Argument types.
1889  /// \returns The cost of Call instruction.
1892  return 10;
1893  }
1894 
1895  unsigned getNumberOfParts(Type *Tp) {
1896  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1897  return LT.first;
1898  }
1899 
1901  const SCEV *) {
1902  return 0;
1903  }
1904 
1905  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1906  /// We're assuming that reduction operation are performing the following way:
1907  /// 1. Non-pairwise reduction
1908  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1909  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1910  /// \----------------v-------------/ \----------v------------/
1911  /// n/2 elements n/2 elements
1912  /// %red1 = op <n x t> %val, <n x t> val1
1913  /// After this operation we have a vector %red1 where only the first n/2
1914  /// elements are meaningful, the second n/2 elements are undefined and can be
1915  /// dropped. All other operations are actually working with the vector of
1916  /// length n/2, not n, though the real vector length is still n.
1917  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1918  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1919  /// \----------------v-------------/ \----------v------------/
1920  /// n/4 elements 3*n/4 elements
1921  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1922  /// length n/2, the resulting vector has length n/4 etc.
1923  /// 2. Pairwise reduction:
1924  /// Everything is the same except for an additional shuffle operation which
1925  /// is used to produce operands for pairwise kind of reductions.
1926  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1927  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1928  /// \-------------v----------/ \----------v------------/
1929  /// n/2 elements n/2 elements
1930  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1931  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1932  /// \-------------v----------/ \----------v------------/
1933  /// n/2 elements n/2 elements
1934  /// %red1 = op <n x t> %val1, <n x t> val2
1935  /// Again, the operation is performed on <n x t> vector, but the resulting
1936  /// vector %red1 is <n/2 x t> vector.
1937  ///
1938  /// The cost model should take into account that the actual length of the
1939  /// vector is reduced on each iteration.
1941  bool IsPairwise,
1943  Type *ScalarTy = Ty->getElementType();
1944  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
1945  if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
1946  ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
1947  NumVecElts >= 2) {
1948  // Or reduction for i1 is represented as:
1949  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
1950  // %res = cmp ne iReduxWidth %val, 0
1951  // And reduction for i1 is represented as:
1952  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
1953  // %res = cmp eq iReduxWidth %val, 11111
1954  Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
1955  return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
1957  thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
1960  }
1961  unsigned NumReduxLevels = Log2_32(NumVecElts);
1962  unsigned ArithCost = 0;
1963  unsigned ShuffleCost = 0;
1964  std::pair<unsigned, MVT> LT =
1965  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
1966  unsigned LongVectorCount = 0;
1967  unsigned MVTLen =
1968  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1969  while (NumVecElts > MVTLen) {
1970  NumVecElts /= 2;
1971  VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
1972  // Assume the pairwise shuffles add a cost.
1973  ShuffleCost += (IsPairwise + 1) *
1974  thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
1975  NumVecElts, SubTy);
1976  ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
1977  Ty = SubTy;
1978  ++LongVectorCount;
1979  }
1980 
1981  NumReduxLevels -= LongVectorCount;
1982 
1983  // The minimal length of the vector is limited by the real length of vector
1984  // operations performed on the current platform. That's why several final
1985  // reduction operations are performed on the vectors with the same
1986  // architecture-dependent length.
1987 
1988  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1989  // reductions need two shuffles on every level, but the last one. On that
1990  // level one of the shuffles is <0, u, u, ...> which is identity.
1991  unsigned NumShuffles = NumReduxLevels;
1992  if (IsPairwise && NumReduxLevels >= 1)
1993  NumShuffles += NumReduxLevels - 1;
1994  ShuffleCost += NumShuffles * thisT()->getShuffleCost(
1995  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
1996  ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
1997  return ShuffleCost + ArithCost +
1998  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1999  }
2000 
2001  /// Try to calculate op costs for min/max reduction operations.
2002  /// \param CondTy Conditional type for the Select instruction.
2004  bool IsPairwise, bool IsUnsigned,
2006  Type *ScalarTy = Ty->getElementType();
2007  Type *ScalarCondTy = CondTy->getElementType();
2008  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2009  unsigned NumReduxLevels = Log2_32(NumVecElts);
2010  unsigned CmpOpcode;
2011  if (Ty->isFPOrFPVectorTy()) {
2012  CmpOpcode = Instruction::FCmp;
2013  } else {
2014  assert(Ty->isIntOrIntVectorTy() &&
2015  "expecting floating point or integer type for min/max reduction");
2016  CmpOpcode = Instruction::ICmp;
2017  }
2018  InstructionCost MinMaxCost = 0;
2019  unsigned ShuffleCost = 0;
2020  std::pair<unsigned, MVT> LT =
2021  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
2022  unsigned LongVectorCount = 0;
2023  unsigned MVTLen =
2024  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2025  while (NumVecElts > MVTLen) {
2026  NumVecElts /= 2;
2027  auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2028  CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
2029 
2030  // Assume the pairwise shuffles add a cost.
2031  ShuffleCost += (IsPairwise + 1) *
2032  thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2033  NumVecElts, SubTy);
2034  MinMaxCost +=
2035  thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2037  thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
2039  Ty = SubTy;
2040  ++LongVectorCount;
2041  }
2042 
2043  NumReduxLevels -= LongVectorCount;
2044 
2045  // The minimal length of the vector is limited by the real length of vector
2046  // operations performed on the current platform. That's why several final
2047  // reduction opertions are perfomed on the vectors with the same
2048  // architecture-dependent length.
2049 
2050  // Non pairwise reductions need one shuffle per reduction level. Pairwise
2051  // reductions need two shuffles on every level, but the last one. On that
2052  // level one of the shuffles is <0, u, u, ...> which is identity.
2053  unsigned NumShuffles = NumReduxLevels;
2054  if (IsPairwise && NumReduxLevels >= 1)
2055  NumShuffles += NumReduxLevels - 1;
2056  ShuffleCost += NumShuffles * thisT()->getShuffleCost(
2057  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
2058  MinMaxCost +=
2059  NumReduxLevels *
2060  (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2062  thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2064  // The last min/max should be in vector registers and we counted it above.
2065  // So just need a single extractelement.
2066  return ShuffleCost + MinMaxCost +
2067  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2068  }
2069 
2070  InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
2071  Type *ResTy, VectorType *Ty,
2073  // Without any native support, this is equivalent to the cost of
2074  // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext))
2075  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2076  InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2077  Instruction::Add, ExtTy, false, CostKind);
2078  InstructionCost MulCost = 0;
2079  InstructionCost ExtCost = thisT()->getCastInstrCost(
2080  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2082  if (IsMLA) {
2083  MulCost =
2084  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2085  ExtCost *= 2;
2086  }
2087 
2088  return RedCost + MulCost + ExtCost;
2089  }
2090 
2091  unsigned getVectorSplitCost() { return 1; }
2092 
2093  /// @}
2094 };
2095 
2096 /// Concrete BasicTTIImpl that can be used if no further customization
2097 /// is needed.
2098 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2100 
2102 
2103  const TargetSubtargetInfo *ST;
2104  const TargetLoweringBase *TLI;
2105 
2106  const TargetSubtargetInfo *getST() const { return ST; }
2107  const TargetLoweringBase *getTLI() const { return TLI; }
2108 
2109 public:
2110  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2111 };
2112 
2113 } // end namespace llvm
2114 
2115 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
llvm::MCSubtargetInfo::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: MCSubtargetInfo.cpp:359
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:869
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:26
llvm::EngineKind::Kind
Kind
Definition: ExecutionEngine.h:524
ValueTypes.h
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:473
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:452
llvm::BasicTTIImplBase::getArithmeticInstrCost
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:688
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:848
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:480
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:211
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1014
llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition: TargetTransformInfoImpl.h:151
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:264
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1285
MathExtras.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:448
llvm
Definition: AllocatorList.h:23
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:188
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:201
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::BasicTTIImplBase::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:560
llvm::BasicTTIImplBase::getCFInstrCost
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:926
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
llvm::TargetTransformInfoImplCRTPBase::getGEPCost
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency)
Definition: TargetTransformInfoImpl.h:845
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:193
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::TargetTransformInfoImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Definition: TargetTransformInfoImpl.h:157
llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:429
llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:294
llvm::BasicTTIImplBase::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:216
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1282
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:722
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:931
llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:229
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:386
llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:245
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:883
llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:228
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
llvm::Function
Definition: Function.h:61
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:529
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1285
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:380
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:651
llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:304
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:693
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:535
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:206
llvm::BasicTTIImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:522
llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:643
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1034
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:693
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:148
llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:193
llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:202
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:443
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:147
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:860
llvm::BasicTTIImplBase::getFPOpCost
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:419
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:460
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:190
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:214
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetLowering.h:2361
APInt.h
llvm::IntrinsicCostAttributes::getScalarizationCost
unsigned getScalarizationCost() const
Definition: TargetTransformInfo.h:150
llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:415
llvm::BasicTTIImplBase::~BasicTTIImplBase
virtual ~BasicTTIImplBase()=default
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:476
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition: TargetLowering.h:1238
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1581
llvm::TargetLoweringBase::isIndexedLoadLegal
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1269
llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:272
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:529
llvm::TargetLoweringBase::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I) const
Definition: TargetLowering.h:2489
llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition: TargetLoweringBase.cpp:1592
llvm::BasicTTIImplBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:546
llvm::Optional
Definition: APInt.h:33
llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:266
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::BasicTTIImplBase::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:578
llvm::BasicTTIImplBase::getGEPCost
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands)
Definition: BasicTTIImpl.h:313
llvm::SmallPtrSet< const BasicBlock *, 4 >
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:903
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:424
llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfoImpl.h:444
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1316
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:480
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:599
llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition: TargetLowering.h:192
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: TargetLowering.h:595
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:527
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsPairwise, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2003
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:856
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:202
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: BasicTTIImpl.h:276
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2183
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:965
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:852
llvm::BasicTTIImplBase::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:214
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:846
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::LinearPolySize< TypeSize >::isKnownLT
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:329
TargetTransformInfoImpl.h
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1336
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:158
llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:236
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:149
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
MachineValueType.h
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:927
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:247
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::ElementCount::isScalar
bool isScalar() const
Counting predicates.
Definition: TypeSize.h:396
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:48
llvm::BasicTTIImplBase::getScalarizationOverhead
unsigned getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:670
llvm::BasicTTIImplBase::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:574
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:414
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:870
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2368
Instruction.h
llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Definition: TargetTransformInfoImpl.h:213
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:570
TargetLowering.h
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:152
llvm::MCSubtargetInfo::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
Definition: MCSubtargetInfo.cpp:355
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1247
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:854
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:542
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1100
llvm::MCSubtargetInfo::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
Definition: MCSubtargetInfo.cpp:351
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:653
llvm::TargetTransformInfoImplBase
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:34
llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:430
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:664
llvm::TargetTransformInfoImplBase::getCacheAssociativity
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: TargetTransformInfoImpl.h:422
Constants.h
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:903
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:845
llvm::BasicTTIImplBase::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis()
Definition: BasicTTIImpl.h:212
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::BasicTTIImplBase::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:590
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1079
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2324
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:218
InstrTypes.h
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
SI
@ SI
Definition: SIInstrInfo.cpp:7342
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:868
llvm::BasicTTIImplBase::getScalingFactorCost
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:284
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:918
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
llvm::BasicTTIImplBase::getCacheSize
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:554
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:119
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:597
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: TargetLowering.h:590
llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:686
llvm::BasicTTIImplBase::getCallInstrCost
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:1890
llvm::BasicTTIImplBase::getRegUsageForType
unsigned getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:309
llvm::Instruction
Definition: Instruction.h:45
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1285
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:154
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetLoweringBase.cpp:1889
llvm::TargetMachine::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: TargetMachine.h:312
llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent()
Definition: BasicTTIImpl.h:432
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:867
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:866
llvm::BasicTTIImplBase::getCacheLineSize
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:570
BitVector.h
llvm::TargetTransformInfoImplCRTPBase
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:835
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:74
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1285
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:650
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
getArch - Get the parsed architecture type of this triple.
Definition: Triple.h:307
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:931
llvm::PartialUnrollingThreshold
cl::opt< unsigned > PartialUnrollingThreshold
llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:318
llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:768
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
Type.h
llvm::TargetTransformInfoImplBase::getCFInstrCost
unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition: TargetTransformInfoImpl.h:519
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:116
llvm::BasicTTIImplBase::getAddressComputationCost
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:1900
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables()
Definition: BasicTTIImpl.h:377
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1297
LoopInfo.h
llvm::TargetTransformInfoImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Definition: TargetTransformInfoImpl.h:164
llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfoImpl.h:215
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1285
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool LookThroughBitCast, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:116
llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: TargetTransformInfoImpl.h:180
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:858
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:371
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:847
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:896
BasicBlock.h
llvm::TargetLoweringBase::getScalingFactorCost
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetLowering.h:2350
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1224
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:249
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:852
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: TargetMachine.h:302
llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:527
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:861
llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:408
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1286
llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:536
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:686
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:409
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition: TargetLowering.h:1224
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:881
llvm::TargetLoweringBase::getLoadExtAction
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition: TargetLowering.h:1212
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, bool IsPairwise, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction operations.
Definition: BasicTTIImpl.h:1940
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:898
llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition: TargetLowering.h:1174
llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:260
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition: TargetLowering.h:1114
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition: TargetTransformInfoImpl.h:174
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:423
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:529
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::TargetTransformInfoImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:475
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:903
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:519
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:470
llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:76
ArrayRef.h
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:537
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1316
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetLowering.h:1609
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::BasicTTIImplBase::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:603
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:115
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:755
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:83
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1074
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:771
llvm::BasicTTIImpl::BasicTTIImpl
BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition: BasicTargetTransformInfo.cpp:32
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1285
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:895
llvm::ElementCount::isVector
bool isVector() const
One or more elements.
Definition: TypeSize.h:398
llvm::TargetSubtargetInfo::useAA
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Definition: TargetSubtargetInfo.cpp:60
llvm::MCSubtargetInfo::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Definition: MCSubtargetInfo.cpp:363
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:586
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2173
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:533
llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables()
Definition: BasicTTIImpl.h:383
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:897
llvm::BasicTTIImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask()
Definition: BasicTTIImpl.h:518
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1079
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1285
llvm::APInt::getAllOnesValue
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:567
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
DataLayout.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::TargetTransformInfo::TCK_SizeAndLatency
@ TCK_SizeAndLatency
The weighted sum of size and latency.
Definition: TargetTransformInfo.h:215
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::TargetTransformInfoImplCRTPBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: TargetTransformInfoImpl.h:1130
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2322
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TargetSubtargetInfo.h
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:862
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1731
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:232
llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:528
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:863
llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:496
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1283
llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:495
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:419
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:896
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:281
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:851
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:59
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1316
llvm::BasicTTIImplBase::useAA
bool useAA() const
Definition: BasicTTIImpl.h:302
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:92
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1284
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition: TargetLoweringBase.cpp:918
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:853
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:434
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:163
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:335
llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:504
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2188
Constant.h
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2323
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1286
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition: TargetLowering.h:2679
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:386
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:884
llvm::BasicTTIImpl
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:2098
llvm::KnownBits
Definition: KnownBits.h:23
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2325
llvm::TargetLoweringBase::isIndexedStoreLegal
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1283
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:490
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:856
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:864
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:208
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:855
ISDOpcodes.h
llvm::TypeSize
Definition: TypeSize.h:417
llvm::MCSchedModel::DefaultLoadLatency
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:287
Casting.h
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:197
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1199
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:298
llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:241
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::Function::isTargetIntrinsic
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
Definition: Function.cpp:676
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:841
llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:223
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:184
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfoImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: TargetTransformInfoImpl.h:168
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1128
llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:38
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:93
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:54
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: TargetLowering.h:2472
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1811
llvm::BasicTTIImplBase::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1028
Instructions.h
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:146
llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition: TargetLowering.h:1146
SmallVector.h
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:924
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:195
N
#define N
llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1226
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:655
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:652
llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:1895
TargetTransformInfo.h
llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:985
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2321
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2558
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1287
llvm::SmallVectorImpl< int >
llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:232
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1164
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1294
DerivedTypes.h
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:276
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:445
llvm::BasicTTIImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
Definition: BasicTTIImpl.h:511
llvm::BasicTTIImplBase::getScalarizationOverhead
unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:608
llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:1401
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::MCSubtargetInfo::getCacheLineSize
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Definition: MCSubtargetInfo.cpp:347
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:657
llvm::BasicTTIImplBase::getShuffleCost
unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp)
Definition: BasicTTIImpl.h:746
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:263
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3149
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::BasicTTIImplBase::getVectorSplitCost
unsigned getVectorSplitCost()
Definition: BasicTTIImpl.h:2091
llvm::BasicTTIImplBase::getScalarizationOverhead
unsigned getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:632
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1382
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:154
llvm::MCSubtargetInfo::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
Definition: MCSubtargetInfo.cpp:343
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:209
llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:568
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:865
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:899
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1272
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:477
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:132
llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence()
Definition: BasicTTIImpl.h:210
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:859
llvm::BasicTTIImplBase::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2070
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:346
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:212
llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:280
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:151
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:634
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:853
llvm::Triple::aarch64
@ aarch64
Definition: Triple.h:52
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2178
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:654
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:915
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:129
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::BasicTTIImplBase::getVectorInstrCost
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:978
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:673