LLVM  10.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/MC/MCSchedule.h"
45 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Estimate a cost of Broadcast as an extract and sequence of insert
83  /// operations.
84  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85  assert(Ty->isVectorTy() && "Can only shuffle vectors");
86  unsigned Cost = 0;
87  // Broadcast cost is equal to the cost of extracting the zero'th element
88  // plus the cost of inserting it into every element of the result vector.
89  Cost += static_cast<T *>(this)->getVectorInstrCost(
90  Instruction::ExtractElement, Ty, 0);
91 
92  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93  Cost += static_cast<T *>(this)->getVectorInstrCost(
94  Instruction::InsertElement, Ty, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  unsigned getPermuteShuffleOverhead(Type *Ty) {
102  assert(Ty->isVectorTy() && "Can only shuffle vectors");
103  unsigned Cost = 0;
104  // Shuffle cost is equal to the cost of extracting element from its argument
105  // plus the cost of inserting them onto the result vector.
106 
107  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108  // index 0 of first vector, index 1 of second vector,index 2 of first
109  // vector and finally index 3 of second vector and insert them at index
110  // <0,1,2,3> of result vector.
111  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112  Cost += static_cast<T *>(this)
113  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114  Cost += static_cast<T *>(this)
115  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116  }
117  return Cost;
118  }
119 
120  /// Estimate a cost of subvector extraction as a sequence of extract and
121  /// insert operations.
122  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124  "Can only extract subvectors from vectors");
125  int NumSubElts = SubTy->getVectorNumElements();
126  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127  "SK_ExtractSubvector index out of range");
128 
129  unsigned Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += static_cast<T *>(this)->getVectorInstrCost(
135  Instruction::ExtractElement, Ty, i + Index);
136  Cost += static_cast<T *>(this)->getVectorInstrCost(
137  Instruction::InsertElement, SubTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubTy->getVectorNumElements();
148  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149  "SK_InsertSubvector index out of range");
150 
151  unsigned Cost = 0;
152  // Subvector insertion cost is equal to the cost of extracting element from
153  // the source type plus the cost of inserting them into the result vector
154  // type.
155  for (int i = 0; i != NumSubElts; ++i) {
156  Cost += static_cast<T *>(this)->getVectorInstrCost(
157  Instruction::ExtractElement, SubTy, i);
158  Cost += static_cast<T *>(this)->getVectorInstrCost(
159  Instruction::InsertElement, Ty, i + Index);
160  }
161  return Cost;
162  }
163 
164  /// Local query method delegates up to T which *must* implement this!
165  const TargetSubtargetInfo *getST() const {
166  return static_cast<const T *>(this)->getST();
167  }
168 
169  /// Local query method delegates up to T which *must* implement this!
170  const TargetLoweringBase *getTLI() const {
171  return static_cast<const T *>(this)->getTLI();
172  }
173 
174  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175  switch (M) {
176  case TTI::MIM_Unindexed:
177  return ISD::UNINDEXED;
178  case TTI::MIM_PreInc:
179  return ISD::PRE_INC;
180  case TTI::MIM_PreDec:
181  return ISD::PRE_DEC;
182  case TTI::MIM_PostInc:
183  return ISD::POST_INC;
184  case TTI::MIM_PostDec:
185  return ISD::POST_DEC;
186  }
187  llvm_unreachable("Unexpected MemIndexedMode");
188  }
189 
190 protected:
191  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192  : BaseT(DL) {}
193  virtual ~BasicTTIImplBase() = default;
194 
196 
197 public:
198  /// \name Scalar TTI Implementations
199  /// @{
201  unsigned AddressSpace, unsigned Alignment,
202  bool *Fast) const {
203  EVT E = EVT::getIntegerVT(Context, BitWidth);
204  return getTLI()->allowsMisalignedMemoryAccesses(
205  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206  }
207 
208  bool hasBranchDivergence() { return false; }
209 
210  bool isSourceOfDivergence(const Value *V) { return false; }
211 
212  bool isAlwaysUniform(const Value *V) { return false; }
213 
214  unsigned getFlatAddressSpace() {
215  // Return an invalid address space.
216  return -1;
217  }
218 
220  Intrinsic::ID IID) const {
221  return false;
222  }
223 
225  Value *OldV, Value *NewV) const {
226  return false;
227  }
228 
229  bool isLegalAddImmediate(int64_t imm) {
230  return getTLI()->isLegalAddImmediate(imm);
231  }
232 
233  bool isLegalICmpImmediate(int64_t imm) {
234  return getTLI()->isLegalICmpImmediate(imm);
235  }
236 
237  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238  bool HasBaseReg, int64_t Scale,
239  unsigned AddrSpace, Instruction *I = nullptr) {
241  AM.BaseGV = BaseGV;
242  AM.BaseOffs = BaseOffset;
243  AM.HasBaseReg = HasBaseReg;
244  AM.Scale = Scale;
245  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246  }
247 
249  const DataLayout &DL) const {
250  EVT VT = getTLI()->getValueType(DL, Ty);
251  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252  }
253 
255  const DataLayout &DL) const {
256  EVT VT = getTLI()->getValueType(DL, Ty);
257  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258  }
259 
262  }
263 
264  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
267  AM.BaseGV = BaseGV;
268  AM.BaseOffs = BaseOffset;
269  AM.HasBaseReg = HasBaseReg;
270  AM.Scale = Scale;
271  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272  }
273 
274  bool isTruncateFree(Type *Ty1, Type *Ty2) {
275  return getTLI()->isTruncateFree(Ty1, Ty2);
276  }
277 
279  return getTLI()->isProfitableToHoist(I);
280  }
281 
282  bool useAA() const { return getST()->useAA(); }
283 
284  bool isTypeLegal(Type *Ty) {
285  EVT VT = getTLI()->getValueType(DL, Ty);
286  return getTLI()->isTypeLegal(VT);
287  }
288 
289  int getGEPCost(Type *PointeeType, const Value *Ptr,
291  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292  }
293 
294  int getExtCost(const Instruction *I, const Value *Src) {
295  if (getTLI()->isExtFree(I))
297 
298  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300  if (getTLI()->isExtLoad(LI, I, DL))
302 
304  }
305 
306  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
308  return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309  }
310 
311  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312  ArrayRef<Type *> ParamTys, const User *U) {
313  if (IID == Intrinsic::cttz) {
314  if (getTLI()->isCheapToSpeculateCttz())
317  }
318 
319  if (IID == Intrinsic::ctlz) {
320  if (getTLI()->isCheapToSpeculateCtlz())
323  }
324 
325  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326  }
327 
329  unsigned &JumpTableSize) {
330  /// Try to find the estimated number of clusters. Note that the number of
331  /// clusters identified in this function could be different from the actual
332  /// numbers found in lowering. This function ignore switches that are
333  /// lowered with a mix of jump table / bit test / BTree. This function was
334  /// initially intended to be used when estimating the cost of switch in
335  /// inline cost heuristic, but it's a generic cost model to be used in other
336  /// places (e.g., in loop unrolling).
337  unsigned N = SI.getNumCases();
338  const TargetLoweringBase *TLI = getTLI();
339  const DataLayout &DL = this->getDataLayout();
340 
341  JumpTableSize = 0;
342  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
343 
344  // Early exit if both a jump table and bit test are not allowed.
345  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
346  return N;
347 
348  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
349  APInt MinCaseVal = MaxCaseVal;
350  for (auto CI : SI.cases()) {
351  const APInt &CaseVal = CI.getCaseValue()->getValue();
352  if (CaseVal.sgt(MaxCaseVal))
353  MaxCaseVal = CaseVal;
354  if (CaseVal.slt(MinCaseVal))
355  MinCaseVal = CaseVal;
356  }
357 
358  // Check if suitable for a bit test
359  if (N <= DL.getIndexSizeInBits(0u)) {
361  for (auto I : SI.cases())
362  Dests.insert(I.getCaseSuccessor());
363 
364  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
365  DL))
366  return 1;
367  }
368 
369  // Check if suitable for a jump table.
370  if (IsJTAllowed) {
371  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
372  return N;
373  uint64_t Range =
374  (MaxCaseVal - MinCaseVal)
375  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
376  // Check whether a range of clusters is dense enough for a jump table
377  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
378  JumpTableSize = Range;
379  return 1;
380  }
381  }
382  return N;
383  }
384 
386  const TargetLoweringBase *TLI = getTLI();
389  }
390 
391  bool haveFastSqrt(Type *Ty) {
392  const TargetLoweringBase *TLI = getTLI();
393  EVT VT = TLI->getValueType(DL, Ty);
394  return TLI->isTypeLegal(VT) &&
396  }
397 
399  return true;
400  }
401 
402  unsigned getFPOpCost(Type *Ty) {
403  // Check whether FADD is available, as a proxy for floating-point in
404  // general.
405  const TargetLoweringBase *TLI = getTLI();
406  EVT VT = TLI->getValueType(DL, Ty);
410  }
411 
412  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
413  const TargetLoweringBase *TLI = getTLI();
414  switch (Opcode) {
415  default: break;
416  case Instruction::Trunc:
417  if (TLI->isTruncateFree(OpTy, Ty))
420  case Instruction::ZExt:
421  if (TLI->isZExtFree(OpTy, Ty))
424 
425  case Instruction::AddrSpaceCast:
427  Ty->getPointerAddressSpace()))
430  }
431 
432  return BaseT::getOperationCost(Opcode, Ty, OpTy);
433  }
434 
435  unsigned getInliningThresholdMultiplier() { return 1; }
436 
437  int getInlinerVectorBonusPercent() { return 150; }
438 
441  // This unrolling functionality is target independent, but to provide some
442  // motivation for its intended use, for x86:
443 
444  // According to the Intel 64 and IA-32 Architectures Optimization Reference
445  // Manual, Intel Core models and later have a loop stream detector (and
446  // associated uop queue) that can benefit from partial unrolling.
447  // The relevant requirements are:
448  // - The loop must have no more than 4 (8 for Nehalem and later) branches
449  // taken, and none of them may be calls.
450  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
451 
452  // According to the Software Optimization Guide for AMD Family 15h
453  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
454  // and loop buffer which can benefit from partial unrolling.
455  // The relevant requirements are:
456  // - The loop must have fewer than 16 branches
457  // - The loop must have less than 40 uops in all executed loop branches
458 
459  // The number of taken branches in a loop is hard to estimate here, and
460  // benchmarking has revealed that it is better not to be conservative when
461  // estimating the branch count. As a result, we'll ignore the branch limits
462  // until someone finds a case where it matters in practice.
463 
464  unsigned MaxOps;
465  const TargetSubtargetInfo *ST = getST();
466  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
467  MaxOps = PartialUnrollingThreshold;
468  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
469  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
470  else
471  return;
472 
473  // Scan the loop: don't unroll loops with calls.
474  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
475  ++I) {
476  BasicBlock *BB = *I;
477 
478  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
479  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
480  ImmutableCallSite CS(&*J);
481  if (const Function *F = CS.getCalledFunction()) {
482  if (!static_cast<T *>(this)->isLoweredToCall(F))
483  continue;
484  }
485 
486  return;
487  }
488  }
489 
490  // Enable runtime and partial unrolling up to the specified size.
491  // Enable using trip count upper bound to unroll loops.
492  UP.Partial = UP.Runtime = UP.UpperBound = true;
493  UP.PartialThreshold = MaxOps;
494 
495  // Avoid unrolling when optimizing for size.
496  UP.OptSizeThreshold = 0;
498 
499  // Set number of instructions optimized when "back edge"
500  // becomes "fall through" to default value of 2.
501  UP.BEInsns = 2;
502  }
503 
505  AssumptionCache &AC,
506  TargetLibraryInfo *LibInfo,
507  HardwareLoopInfo &HWLoopInfo) {
508  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
509  }
510 
512  if (isa<LoadInst>(I))
513  return getST()->getSchedModel().DefaultLoadLatency;
514 
516  }
517 
518  virtual Optional<unsigned>
520  return Optional<unsigned>(
521  getST()->getCacheSize(static_cast<unsigned>(Level)));
522  }
523 
524  virtual Optional<unsigned>
526  Optional<unsigned> TargetResult =
527  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
528 
529  if (TargetResult)
530  return TargetResult;
531 
532  return BaseT::getCacheAssociativity(Level);
533  }
534 
535  virtual unsigned getCacheLineSize() const {
536  return getST()->getCacheLineSize();
537  }
538 
539  virtual unsigned getPrefetchDistance() const {
540  return getST()->getPrefetchDistance();
541  }
542 
543  virtual unsigned getMinPrefetchStride() const {
544  return getST()->getMinPrefetchStride();
545  }
546 
547  virtual unsigned getMaxPrefetchIterationsAhead() const {
548  return getST()->getMaxPrefetchIterationsAhead();
549  }
550 
551  /// @}
552 
553  /// \name Vector TTI Implementations
554  /// @{
555 
556  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
557 
558  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
559  /// are set if the result needs to be inserted and/or extracted from vectors.
560  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
561  assert(Ty->isVectorTy() && "Can only scalarize vectors");
562  unsigned Cost = 0;
563 
564  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
565  if (Insert)
566  Cost += static_cast<T *>(this)
567  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
568  if (Extract)
569  Cost += static_cast<T *>(this)
570  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
571  }
572 
573  return Cost;
574  }
575 
576  /// Estimate the overhead of scalarizing an instructions unique
577  /// non-constant operands. The types of the arguments are ordinarily
578  /// scalar, in which case the costs are multiplied with VF.
580  unsigned VF) {
581  unsigned Cost = 0;
582  SmallPtrSet<const Value*, 4> UniqueOperands;
583  for (const Value *A : Args) {
584  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
585  Type *VecTy = nullptr;
586  if (A->getType()->isVectorTy()) {
587  VecTy = A->getType();
588  // If A is a vector operand, VF should be 1 or correspond to A.
589  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
590  "Vector argument does not match VF");
591  }
592  else
593  VecTy = VectorType::get(A->getType(), VF);
594 
595  Cost += getScalarizationOverhead(VecTy, false, true);
596  }
597  }
598 
599  return Cost;
600  }
601 
603  assert(VecTy->isVectorTy());
604 
605  unsigned Cost = 0;
606 
607  Cost += getScalarizationOverhead(VecTy, true, false);
608  if (!Args.empty())
610  VecTy->getVectorNumElements());
611  else
612  // When no information on arguments is provided, we add the cost
613  // associated with one argument as a heuristic.
614  Cost += getScalarizationOverhead(VecTy, false, true);
615 
616  return Cost;
617  }
618 
619  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
620 
622  unsigned Opcode, Type *Ty,
628  // Check if any of the operands are vector operands.
629  const TargetLoweringBase *TLI = getTLI();
630  int ISD = TLI->InstructionOpcodeToISD(Opcode);
631  assert(ISD && "Invalid opcode");
632 
633  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
634 
635  bool IsFloat = Ty->isFPOrFPVectorTy();
636  // Assume that floating point arithmetic operations cost twice as much as
637  // integer operations.
638  unsigned OpCost = (IsFloat ? 2 : 1);
639 
640  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
641  // The operation is legal. Assume it costs 1.
642  // TODO: Once we have extract/insert subvector cost we need to use them.
643  return LT.first * OpCost;
644  }
645 
646  if (!TLI->isOperationExpand(ISD, LT.second)) {
647  // If the operation is custom lowered, then assume that the code is twice
648  // as expensive.
649  return LT.first * 2 * OpCost;
650  }
651 
652  // Else, assume that we need to scalarize this op.
653  // TODO: If one of the types get legalized by splitting, handle this
654  // similarly to what getCastInstrCost() does.
655  if (Ty->isVectorTy()) {
656  unsigned Num = Ty->getVectorNumElements();
657  unsigned Cost = static_cast<T *>(this)
658  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
659  // Return the cost of multiple scalar invocation plus the cost of
660  // inserting and extracting the values.
661  return getScalarizationOverhead(Ty, Args) + Num * Cost;
662  }
663 
664  // We don't know anything about this scalar instruction.
665  return OpCost;
666  }
667 
668  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
669  Type *SubTp) {
670  switch (Kind) {
671  case TTI::SK_Broadcast:
672  return getBroadcastShuffleOverhead(Tp);
673  case TTI::SK_Select:
674  case TTI::SK_Reverse:
675  case TTI::SK_Transpose:
678  return getPermuteShuffleOverhead(Tp);
680  return getExtractSubvectorOverhead(Tp, Index, SubTp);
682  return getInsertSubvectorOverhead(Tp, Index, SubTp);
683  }
684  llvm_unreachable("Unknown TTI::ShuffleKind");
685  }
686 
687  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
688  const Instruction *I = nullptr) {
689  const TargetLoweringBase *TLI = getTLI();
690  int ISD = TLI->InstructionOpcodeToISD(Opcode);
691  assert(ISD && "Invalid opcode");
692  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
693  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
694 
695  // Check for NOOP conversions.
696  if (SrcLT.first == DstLT.first &&
697  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
698 
699  // Bitcast between types that are legalized to the same type are free.
700  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
701  return 0;
702  }
703 
704  if (Opcode == Instruction::Trunc &&
705  TLI->isTruncateFree(SrcLT.second, DstLT.second))
706  return 0;
707 
708  if (Opcode == Instruction::ZExt &&
709  TLI->isZExtFree(SrcLT.second, DstLT.second))
710  return 0;
711 
712  if (Opcode == Instruction::AddrSpaceCast &&
714  Dst->getPointerAddressSpace()))
715  return 0;
716 
717  // If this is a zext/sext of a load, return 0 if the corresponding
718  // extending load exists on target.
719  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
720  I && isa<LoadInst>(I->getOperand(0))) {
721  EVT ExtVT = EVT::getEVT(Dst);
722  EVT LoadVT = EVT::getEVT(Src);
723  unsigned LType =
724  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
725  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
726  return 0;
727  }
728 
729  // If the cast is marked as legal (or promote) then assume low cost.
730  if (SrcLT.first == DstLT.first &&
731  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
732  return 1;
733 
734  // Handle scalar conversions.
735  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
736  // Scalar bitcasts are usually free.
737  if (Opcode == Instruction::BitCast)
738  return 0;
739 
740  // Just check the op cost. If the operation is legal then assume it costs
741  // 1.
742  if (!TLI->isOperationExpand(ISD, DstLT.second))
743  return 1;
744 
745  // Assume that illegal scalar instruction are expensive.
746  return 4;
747  }
748 
749  // Check vector-to-vector casts.
750  if (Dst->isVectorTy() && Src->isVectorTy()) {
751  // If the cast is between same-sized registers, then the check is simple.
752  if (SrcLT.first == DstLT.first &&
753  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
754 
755  // Assume that Zext is done using AND.
756  if (Opcode == Instruction::ZExt)
757  return 1;
758 
759  // Assume that sext is done using SHL and SRA.
760  if (Opcode == Instruction::SExt)
761  return 2;
762 
763  // Just check the op cost. If the operation is legal then assume it
764  // costs
765  // 1 and multiply by the type-legalization overhead.
766  if (!TLI->isOperationExpand(ISD, DstLT.second))
767  return SrcLT.first * 1;
768  }
769 
770  // If we are legalizing by splitting, query the concrete TTI for the cost
771  // of casting the original vector twice. We also need to factor in the
772  // cost of the split itself. Count that as 1, to be consistent with
773  // TLI->getTypeLegalizationCost().
774  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
776  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
778  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
779  Dst->getVectorNumElements() / 2);
780  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
781  Src->getVectorNumElements() / 2);
782  T *TTI = static_cast<T *>(this);
783  return TTI->getVectorSplitCost() +
784  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
785  }
786 
787  // In other cases where the source or destination are illegal, assume
788  // the operation will get scalarized.
789  unsigned Num = Dst->getVectorNumElements();
790  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
791  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
792 
793  // Return the cost of multiple scalar invocation plus the cost of
794  // inserting and extracting the values.
795  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
796  }
797 
798  // We already handled vector-to-vector and scalar-to-scalar conversions.
799  // This
800  // is where we handle bitcast between vectors and scalars. We need to assume
801  // that the conversion is scalarized in one way or another.
802  if (Opcode == Instruction::BitCast)
803  // Illegal bitcasts are done by storing and loading from a stack slot.
804  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
805  : 0) +
806  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
807  : 0);
808 
809  llvm_unreachable("Unhandled cast");
810  }
811 
812  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
813  VectorType *VecTy, unsigned Index) {
814  return static_cast<T *>(this)->getVectorInstrCost(
815  Instruction::ExtractElement, VecTy, Index) +
816  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
817  VecTy->getElementType());
818  }
819 
820  unsigned getCFInstrCost(unsigned Opcode) {
821  // Branches are assumed to be predicted.
822  return 0;
823  }
824 
825  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
826  const Instruction *I) {
827  const TargetLoweringBase *TLI = getTLI();
828  int ISD = TLI->InstructionOpcodeToISD(Opcode);
829  assert(ISD && "Invalid opcode");
830 
831  // Selects on vectors are actually vector selects.
832  if (ISD == ISD::SELECT) {
833  assert(CondTy && "CondTy must exist");
834  if (CondTy->isVectorTy())
835  ISD = ISD::VSELECT;
836  }
837  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
838 
839  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
840  !TLI->isOperationExpand(ISD, LT.second)) {
841  // The operation is legal. Assume it costs 1. Multiply
842  // by the type-legalization overhead.
843  return LT.first * 1;
844  }
845 
846  // Otherwise, assume that the cast is scalarized.
847  // TODO: If one of the types get legalized by splitting, handle this
848  // similarly to what getCastInstrCost() does.
849  if (ValTy->isVectorTy()) {
850  unsigned Num = ValTy->getVectorNumElements();
851  if (CondTy)
852  CondTy = CondTy->getScalarType();
853  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
854  Opcode, ValTy->getScalarType(), CondTy, I);
855 
856  // Return the cost of multiple scalar invocation plus the cost of
857  // inserting and extracting the values.
858  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
859  }
860 
861  // Unknown scalar opcode.
862  return 1;
863  }
864 
865  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
866  std::pair<unsigned, MVT> LT =
867  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
868 
869  return LT.first;
870  }
871 
872  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
873  unsigned AddressSpace, const Instruction *I = nullptr) {
874  assert(!Src->isVoidTy() && "Invalid type");
875  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
876 
877  // Assuming that all loads of legal types cost 1.
878  unsigned Cost = LT.first;
879 
880  if (Src->isVectorTy() &&
881  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
882  // This is a vector load that legalizes to a larger type than the vector
883  // itself. Unless the corresponding extending load or truncating store is
884  // legal, then this will scalarize.
886  EVT MemVT = getTLI()->getValueType(DL, Src);
887  if (Opcode == Instruction::Store)
888  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
889  else
890  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
891 
892  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
893  // This is a vector load/store for some illegal type that is scalarized.
894  // We must account for the cost of building or decomposing the vector.
895  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
896  Opcode == Instruction::Store);
897  }
898  }
899 
900  return Cost;
901  }
902 
903  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
904  unsigned Factor,
905  ArrayRef<unsigned> Indices,
906  unsigned Alignment, unsigned AddressSpace,
907  bool UseMaskForCond = false,
908  bool UseMaskForGaps = false) {
909  VectorType *VT = dyn_cast<VectorType>(VecTy);
910  assert(VT && "Expect a vector type for interleaved memory op");
911 
912  unsigned NumElts = VT->getNumElements();
913  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
914 
915  unsigned NumSubElts = NumElts / Factor;
916  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
917 
918  // Firstly, the cost of load/store operation.
919  unsigned Cost;
920  if (UseMaskForCond || UseMaskForGaps)
921  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
922  Opcode, VecTy, Alignment, AddressSpace);
923  else
924  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
925  AddressSpace);
926 
927  // Legalize the vector type, and get the legalized and unlegalized type
928  // sizes.
929  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
930  unsigned VecTySize =
931  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
932  unsigned VecTyLTSize = VecTyLT.getStoreSize();
933 
934  // Return the ceiling of dividing A by B.
935  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
936 
937  // Scale the cost of the memory operation by the fraction of legalized
938  // instructions that will actually be used. We shouldn't account for the
939  // cost of dead instructions since they will be removed.
940  //
941  // E.g., An interleaved load of factor 8:
942  // %vec = load <16 x i64>, <16 x i64>* %ptr
943  // %v0 = shufflevector %vec, undef, <0, 8>
944  //
945  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
946  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
947  // type). The other loads are unused.
948  //
949  // We only scale the cost of loads since interleaved store groups aren't
950  // allowed to have gaps.
951  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
952  // The number of loads of a legal type it will take to represent a load
953  // of the unlegalized vector type.
954  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
955 
956  // The number of elements of the unlegalized type that correspond to a
957  // single legal instruction.
958  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
959 
960  // Determine which legal instructions will be used.
961  BitVector UsedInsts(NumLegalInsts, false);
962  for (unsigned Index : Indices)
963  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
964  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
965 
966  // Scale the cost of the load by the fraction of legal instructions that
967  // will be used.
968  Cost *= UsedInsts.count() / NumLegalInsts;
969  }
970 
971  // Then plus the cost of interleave operation.
972  if (Opcode == Instruction::Load) {
973  // The interleave cost is similar to extract sub vectors' elements
974  // from the wide vector, and insert them into sub vectors.
975  //
976  // E.g. An interleaved load of factor 2 (with one member of index 0):
977  // %vec = load <8 x i32>, <8 x i32>* %ptr
978  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
979  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
980  // <8 x i32> vector and insert them into a <4 x i32> vector.
981 
982  assert(Indices.size() <= Factor &&
983  "Interleaved memory op has too many members");
984 
985  for (unsigned Index : Indices) {
986  assert(Index < Factor && "Invalid index for interleaved memory op");
987 
988  // Extract elements from loaded vector for each sub vector.
989  for (unsigned i = 0; i < NumSubElts; i++)
990  Cost += static_cast<T *>(this)->getVectorInstrCost(
991  Instruction::ExtractElement, VT, Index + i * Factor);
992  }
993 
994  unsigned InsSubCost = 0;
995  for (unsigned i = 0; i < NumSubElts; i++)
996  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
997  Instruction::InsertElement, SubVT, i);
998 
999  Cost += Indices.size() * InsSubCost;
1000  } else {
1001  // The interleave cost is extract all elements from sub vectors, and
1002  // insert them into the wide vector.
1003  //
1004  // E.g. An interleaved store of factor 2:
1005  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1006  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1007  // The cost is estimated as extract all elements from both <4 x i32>
1008  // vectors and insert into the <8 x i32> vector.
1009 
1010  unsigned ExtSubCost = 0;
1011  for (unsigned i = 0; i < NumSubElts; i++)
1012  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1013  Instruction::ExtractElement, SubVT, i);
1014  Cost += ExtSubCost * Factor;
1015 
1016  for (unsigned i = 0; i < NumElts; i++)
1017  Cost += static_cast<T *>(this)
1018  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1019  }
1020 
1021  if (!UseMaskForCond)
1022  return Cost;
1023 
1024  Type *I8Type = Type::getInt8Ty(VT->getContext());
1025  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1026  SubVT = VectorType::get(I8Type, NumSubElts);
1027 
1028  // The Mask shuffling cost is extract all the elements of the Mask
1029  // and insert each of them Factor times into the wide vector:
1030  //
1031  // E.g. an interleaved group with factor 3:
1032  // %mask = icmp ult <8 x i32> %vec1, %vec2
1033  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1034  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1035  // The cost is estimated as extract all mask elements from the <8xi1> mask
1036  // vector and insert them factor times into the <24xi1> shuffled mask
1037  // vector.
1038  for (unsigned i = 0; i < NumSubElts; i++)
1039  Cost += static_cast<T *>(this)->getVectorInstrCost(
1040  Instruction::ExtractElement, SubVT, i);
1041 
1042  for (unsigned i = 0; i < NumElts; i++)
1043  Cost += static_cast<T *>(this)->getVectorInstrCost(
1044  Instruction::InsertElement, MaskVT, i);
1045 
1046  // The Gaps mask is invariant and created outside the loop, therefore the
1047  // cost of creating it is not accounted for here. However if we have both
1048  // a MaskForGaps and some other mask that guards the execution of the
1049  // memory access, we need to account for the cost of And-ing the two masks
1050  // inside the loop.
1051  if (UseMaskForGaps)
1052  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1053  BinaryOperator::And, MaskVT);
1054 
1055  return Cost;
1056  }
1057 
1058  /// Get intrinsic cost based on arguments.
1061  unsigned VF = 1) {
1062  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1063  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1064  auto *ConcreteTTI = static_cast<T *>(this);
1065 
1066  switch (IID) {
1067  default: {
1068  // Assume that we need to scalarize this intrinsic.
1069  SmallVector<Type *, 4> Types;
1070  for (Value *Op : Args) {
1071  Type *OpTy = Op->getType();
1072  assert(VF == 1 || !OpTy->isVectorTy());
1073  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1074  }
1075 
1076  if (VF > 1 && !RetTy->isVoidTy())
1077  RetTy = VectorType::get(RetTy, VF);
1078 
1079  // Compute the scalarization overhead based on Args for a vector
1080  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1081  // CostModel will pass a vector RetTy and VF is 1.
1082  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1083  if (RetVF > 1 || VF > 1) {
1084  ScalarizationCost = 0;
1085  if (!RetTy->isVoidTy())
1086  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1087  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1088  }
1089 
1090  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1091  ScalarizationCost);
1092  }
1093  case Intrinsic::masked_scatter: {
1094  assert(VF == 1 && "Can't vectorize types here.");
1095  Value *Mask = Args[3];
1096  bool VarMask = !isa<Constant>(Mask);
1097  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1098  return ConcreteTTI->getGatherScatterOpCost(
1099  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1100  }
1101  case Intrinsic::masked_gather: {
1102  assert(VF == 1 && "Can't vectorize types here.");
1103  Value *Mask = Args[2];
1104  bool VarMask = !isa<Constant>(Mask);
1105  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1106  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1107  Args[0], VarMask, Alignment);
1108  }
1109  case Intrinsic::experimental_vector_reduce_add:
1110  case Intrinsic::experimental_vector_reduce_mul:
1111  case Intrinsic::experimental_vector_reduce_and:
1112  case Intrinsic::experimental_vector_reduce_or:
1113  case Intrinsic::experimental_vector_reduce_xor:
1114  case Intrinsic::experimental_vector_reduce_v2_fadd:
1115  case Intrinsic::experimental_vector_reduce_v2_fmul:
1116  case Intrinsic::experimental_vector_reduce_smax:
1117  case Intrinsic::experimental_vector_reduce_smin:
1118  case Intrinsic::experimental_vector_reduce_fmax:
1119  case Intrinsic::experimental_vector_reduce_fmin:
1120  case Intrinsic::experimental_vector_reduce_umax:
1121  case Intrinsic::experimental_vector_reduce_umin:
1122  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1123  case Intrinsic::fshl:
1124  case Intrinsic::fshr: {
1125  Value *X = Args[0];
1126  Value *Y = Args[1];
1127  Value *Z = Args[2];
1128  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1129  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1130  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1131  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1133  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1134  : TTI::OP_None;
1135  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1136  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1137  unsigned Cost = 0;
1138  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1139  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1140  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1141  OpKindX, OpKindZ, OpPropsX);
1142  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1143  OpKindY, OpKindZ, OpPropsY);
1144  // Non-constant shift amounts requires a modulo.
1145  if (OpKindZ != TTI::OK_UniformConstantValue &&
1147  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1148  OpKindZ, OpKindBW, OpPropsZ,
1149  OpPropsBW);
1150  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1151  if (X != Y) {
1152  Type *CondTy = RetTy->getWithNewBitWidth(1);
1153  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1154  CondTy, nullptr);
1155  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1156  CondTy, nullptr);
1157  }
1158  return Cost;
1159  }
1160  }
1161  }
1162 
1163  /// Get intrinsic cost based on argument types.
1164  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1165  /// cost of scalarizing the arguments and the return value will be computed
1166  /// based on types.
1168  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1169  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1170  auto *ConcreteTTI = static_cast<T *>(this);
1171 
1173  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1174  switch (IID) {
1175  default: {
1176  // Assume that we need to scalarize this intrinsic.
1177  unsigned ScalarizationCost = ScalarizationCostPassed;
1178  unsigned ScalarCalls = 1;
1179  Type *ScalarRetTy = RetTy;
1180  if (RetTy->isVectorTy()) {
1181  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1182  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1183  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1184  ScalarRetTy = RetTy->getScalarType();
1185  }
1186  SmallVector<Type *, 4> ScalarTys;
1187  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1188  Type *Ty = Tys[i];
1189  if (Ty->isVectorTy()) {
1190  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1191  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1192  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1193  Ty = Ty->getScalarType();
1194  }
1195  ScalarTys.push_back(Ty);
1196  }
1197  if (ScalarCalls == 1)
1198  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1199 
1200  unsigned ScalarCost =
1201  ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1202 
1203  return ScalarCalls * ScalarCost + ScalarizationCost;
1204  }
1205  // Look for intrinsics that can be lowered directly or turned into a scalar
1206  // intrinsic call.
1207  case Intrinsic::sqrt:
1208  ISDs.push_back(ISD::FSQRT);
1209  break;
1210  case Intrinsic::sin:
1211  ISDs.push_back(ISD::FSIN);
1212  break;
1213  case Intrinsic::cos:
1214  ISDs.push_back(ISD::FCOS);
1215  break;
1216  case Intrinsic::exp:
1217  ISDs.push_back(ISD::FEXP);
1218  break;
1219  case Intrinsic::exp2:
1220  ISDs.push_back(ISD::FEXP2);
1221  break;
1222  case Intrinsic::log:
1223  ISDs.push_back(ISD::FLOG);
1224  break;
1225  case Intrinsic::log10:
1226  ISDs.push_back(ISD::FLOG10);
1227  break;
1228  case Intrinsic::log2:
1229  ISDs.push_back(ISD::FLOG2);
1230  break;
1231  case Intrinsic::fabs:
1232  ISDs.push_back(ISD::FABS);
1233  break;
1234  case Intrinsic::canonicalize:
1236  break;
1237  case Intrinsic::minnum:
1238  ISDs.push_back(ISD::FMINNUM);
1239  if (FMF.noNaNs())
1240  ISDs.push_back(ISD::FMINIMUM);
1241  break;
1242  case Intrinsic::maxnum:
1243  ISDs.push_back(ISD::FMAXNUM);
1244  if (FMF.noNaNs())
1245  ISDs.push_back(ISD::FMAXIMUM);
1246  break;
1247  case Intrinsic::copysign:
1248  ISDs.push_back(ISD::FCOPYSIGN);
1249  break;
1250  case Intrinsic::floor:
1251  ISDs.push_back(ISD::FFLOOR);
1252  break;
1253  case Intrinsic::ceil:
1254  ISDs.push_back(ISD::FCEIL);
1255  break;
1256  case Intrinsic::trunc:
1257  ISDs.push_back(ISD::FTRUNC);
1258  break;
1259  case Intrinsic::nearbyint:
1260  ISDs.push_back(ISD::FNEARBYINT);
1261  break;
1262  case Intrinsic::rint:
1263  ISDs.push_back(ISD::FRINT);
1264  break;
1265  case Intrinsic::round:
1266  ISDs.push_back(ISD::FROUND);
1267  break;
1268  case Intrinsic::pow:
1269  ISDs.push_back(ISD::FPOW);
1270  break;
1271  case Intrinsic::fma:
1272  ISDs.push_back(ISD::FMA);
1273  break;
1274  case Intrinsic::fmuladd:
1275  ISDs.push_back(ISD::FMA);
1276  break;
1277  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1278  case Intrinsic::lifetime_start:
1279  case Intrinsic::lifetime_end:
1280  case Intrinsic::sideeffect:
1281  return 0;
1282  case Intrinsic::masked_store:
1283  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1284  0);
1285  case Intrinsic::masked_load:
1286  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1287  case Intrinsic::experimental_vector_reduce_add:
1288  return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1289  /*IsPairwiseForm=*/false);
1290  case Intrinsic::experimental_vector_reduce_mul:
1291  return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1292  /*IsPairwiseForm=*/false);
1293  case Intrinsic::experimental_vector_reduce_and:
1294  return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1295  /*IsPairwiseForm=*/false);
1296  case Intrinsic::experimental_vector_reduce_or:
1297  return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1298  /*IsPairwiseForm=*/false);
1299  case Intrinsic::experimental_vector_reduce_xor:
1300  return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1301  /*IsPairwiseForm=*/false);
1302  case Intrinsic::experimental_vector_reduce_v2_fadd:
1303  return ConcreteTTI->getArithmeticReductionCost(
1304  Instruction::FAdd, Tys[0],
1305  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1306  // reductions.
1307  case Intrinsic::experimental_vector_reduce_v2_fmul:
1308  return ConcreteTTI->getArithmeticReductionCost(
1309  Instruction::FMul, Tys[0],
1310  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1311  // reductions.
1312  case Intrinsic::experimental_vector_reduce_smax:
1313  case Intrinsic::experimental_vector_reduce_smin:
1314  case Intrinsic::experimental_vector_reduce_fmax:
1315  case Intrinsic::experimental_vector_reduce_fmin:
1316  return ConcreteTTI->getMinMaxReductionCost(
1317  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1318  /*IsUnsigned=*/true);
1319  case Intrinsic::experimental_vector_reduce_umax:
1320  case Intrinsic::experimental_vector_reduce_umin:
1321  return ConcreteTTI->getMinMaxReductionCost(
1322  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1323  /*IsUnsigned=*/false);
1324  case Intrinsic::sadd_sat:
1325  case Intrinsic::ssub_sat: {
1326  Type *CondTy = RetTy->getWithNewBitWidth(1);
1327 
1328  Type *OpTy = StructType::create({RetTy, CondTy});
1329  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1330  ? Intrinsic::sadd_with_overflow
1331  : Intrinsic::ssub_with_overflow;
1332 
1333  // SatMax -> Overflow && SumDiff < 0
1334  // SatMin -> Overflow && SumDiff >= 0
1335  unsigned Cost = 0;
1336  Cost += ConcreteTTI->getIntrinsicInstrCost(
1337  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1338  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1339  CondTy, nullptr);
1340  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1341  CondTy, nullptr);
1342  return Cost;
1343  }
1344  case Intrinsic::uadd_sat:
1345  case Intrinsic::usub_sat: {
1346  Type *CondTy = RetTy->getWithNewBitWidth(1);
1347 
1348  Type *OpTy = StructType::create({RetTy, CondTy});
1349  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1350  ? Intrinsic::uadd_with_overflow
1351  : Intrinsic::usub_with_overflow;
1352 
1353  unsigned Cost = 0;
1354  Cost += ConcreteTTI->getIntrinsicInstrCost(
1355  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1356  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1357  CondTy, nullptr);
1358  return Cost;
1359  }
1360  case Intrinsic::smul_fix:
1361  case Intrinsic::umul_fix: {
1362  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1363  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1364 
1365  unsigned ExtOp =
1366  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1367 
1368  unsigned Cost = 0;
1369  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1370  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1371  Cost +=
1372  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1373  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1376  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1379  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1380  return Cost;
1381  }
1382  case Intrinsic::sadd_with_overflow:
1383  case Intrinsic::ssub_with_overflow: {
1384  Type *SumTy = RetTy->getContainedType(0);
1385  Type *OverflowTy = RetTy->getContainedType(1);
1386  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1388  : BinaryOperator::Sub;
1389 
1390  // LHSSign -> LHS >= 0
1391  // RHSSign -> RHS >= 0
1392  // SumSign -> Sum >= 0
1393  //
1394  // Add:
1395  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1396  // Sub:
1397  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1398  unsigned Cost = 0;
1399  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1400  Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1401  OverflowTy, nullptr);
1402  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1403  BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1404  Cost +=
1405  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1406  return Cost;
1407  }
1408  case Intrinsic::uadd_with_overflow:
1409  case Intrinsic::usub_with_overflow: {
1410  Type *SumTy = RetTy->getContainedType(0);
1411  Type *OverflowTy = RetTy->getContainedType(1);
1412  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1414  : BinaryOperator::Sub;
1415 
1416  unsigned Cost = 0;
1417  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1418  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1419  OverflowTy, nullptr);
1420  return Cost;
1421  }
1422  case Intrinsic::smul_with_overflow:
1423  case Intrinsic::umul_with_overflow: {
1424  Type *MulTy = RetTy->getContainedType(0);
1425  Type *OverflowTy = RetTy->getContainedType(1);
1426  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1427  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1428 
1429  unsigned ExtOp =
1430  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1431 
1432  unsigned Cost = 0;
1433  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1434  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1435  Cost +=
1436  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1437  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1440 
1441  if (IID == Intrinsic::smul_with_overflow)
1442  Cost += ConcreteTTI->getArithmeticInstrCost(
1443  Instruction::AShr, MulTy, TTI::OK_AnyValue,
1445 
1446  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1447  OverflowTy, nullptr);
1448  return Cost;
1449  }
1450  case Intrinsic::ctpop:
1451  ISDs.push_back(ISD::CTPOP);
1452  // In case of legalization use TCC_Expensive. This is cheaper than a
1453  // library call but still not a cheap instruction.
1454  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1455  break;
1456  // FIXME: ctlz, cttz, ...
1457  }
1458 
1459  const TargetLoweringBase *TLI = getTLI();
1460  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1461 
1462  SmallVector<unsigned, 2> LegalCost;
1463  SmallVector<unsigned, 2> CustomCost;
1464  for (unsigned ISD : ISDs) {
1465  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1466  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1467  TLI->isFAbsFree(LT.second)) {
1468  return 0;
1469  }
1470 
1471  // The operation is legal. Assume it costs 1.
1472  // If the type is split to multiple registers, assume that there is some
1473  // overhead to this.
1474  // TODO: Once we have extract/insert subvector cost we need to use them.
1475  if (LT.first > 1)
1476  LegalCost.push_back(LT.first * 2);
1477  else
1478  LegalCost.push_back(LT.first * 1);
1479  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1480  // If the operation is custom lowered then assume
1481  // that the code is twice as expensive.
1482  CustomCost.push_back(LT.first * 2);
1483  }
1484  }
1485 
1486  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1487  if (MinLegalCostI != LegalCost.end())
1488  return *MinLegalCostI;
1489 
1490  auto MinCustomCostI =
1491  std::min_element(CustomCost.begin(), CustomCost.end());
1492  if (MinCustomCostI != CustomCost.end())
1493  return *MinCustomCostI;
1494 
1495  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1496  // point mul followed by an add.
1497  if (IID == Intrinsic::fmuladd)
1498  return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1499  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1500 
1501  // Else, assume that we need to scalarize this intrinsic. For math builtins
1502  // this will emit a costly libcall, adding call overhead and spills. Make it
1503  // very expensive.
1504  if (RetTy->isVectorTy()) {
1505  unsigned ScalarizationCost =
1506  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1507  ? ScalarizationCostPassed
1508  : getScalarizationOverhead(RetTy, true, false));
1509  unsigned ScalarCalls = RetTy->getVectorNumElements();
1510  SmallVector<Type *, 4> ScalarTys;
1511  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1512  Type *Ty = Tys[i];
1513  if (Ty->isVectorTy())
1514  Ty = Ty->getScalarType();
1515  ScalarTys.push_back(Ty);
1516  }
1517  unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1518  IID, RetTy->getScalarType(), ScalarTys, FMF);
1519  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1520  if (Tys[i]->isVectorTy()) {
1521  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1522  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1523  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1524  }
1525  }
1526 
1527  return ScalarCalls * ScalarCost + ScalarizationCost;
1528  }
1529 
1530  // This is going to be turned into a library call, make it expensive.
1531  return SingleCallCost;
1532  }
1533 
1534  /// Compute a cost of the given call instruction.
1535  ///
1536  /// Compute the cost of calling function F with return type RetTy and
1537  /// argument types Tys. F might be nullptr, in this case the cost of an
1538  /// arbitrary call with the specified signature will be returned.
1539  /// This is used, for instance, when we estimate call of a vector
1540  /// counterpart of the given function.
1541  /// \param F Called function, might be nullptr.
1542  /// \param RetTy Return value types.
1543  /// \param Tys Argument types.
1544  /// \returns The cost of Call instruction.
1546  return 10;
1547  }
1548 
1549  unsigned getNumberOfParts(Type *Tp) {
1550  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1551  return LT.first;
1552  }
1553 
1555  const SCEV *) {
1556  return 0;
1557  }
1558 
1559  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1560  /// We're assuming that reduction operation are performing the following way:
1561  /// 1. Non-pairwise reduction
1562  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1563  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1564  /// \----------------v-------------/ \----------v------------/
1565  /// n/2 elements n/2 elements
1566  /// %red1 = op <n x t> %val, <n x t> val1
1567  /// After this operation we have a vector %red1 where only the first n/2
1568  /// elements are meaningful, the second n/2 elements are undefined and can be
1569  /// dropped. All other operations are actually working with the vector of
1570  /// length n/2, not n, though the real vector length is still n.
1571  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1572  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1573  /// \----------------v-------------/ \----------v------------/
1574  /// n/4 elements 3*n/4 elements
1575  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1576  /// length n/2, the resulting vector has length n/4 etc.
1577  /// 2. Pairwise reduction:
1578  /// Everything is the same except for an additional shuffle operation which
1579  /// is used to produce operands for pairwise kind of reductions.
1580  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1581  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1582  /// \-------------v----------/ \----------v------------/
1583  /// n/2 elements n/2 elements
1584  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1585  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1586  /// \-------------v----------/ \----------v------------/
1587  /// n/2 elements n/2 elements
1588  /// %red1 = op <n x t> %val1, <n x t> val2
1589  /// Again, the operation is performed on <n x t> vector, but the resulting
1590  /// vector %red1 is <n/2 x t> vector.
1591  ///
1592  /// The cost model should take into account that the actual length of the
1593  /// vector is reduced on each iteration.
1594  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1595  bool IsPairwise) {
1596  assert(Ty->isVectorTy() && "Expect a vector type");
1597  Type *ScalarTy = Ty->getVectorElementType();
1598  unsigned NumVecElts = Ty->getVectorNumElements();
1599  unsigned NumReduxLevels = Log2_32(NumVecElts);
1600  unsigned ArithCost = 0;
1601  unsigned ShuffleCost = 0;
1602  auto *ConcreteTTI = static_cast<T *>(this);
1603  std::pair<unsigned, MVT> LT =
1604  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1605  unsigned LongVectorCount = 0;
1606  unsigned MVTLen =
1607  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1608  while (NumVecElts > MVTLen) {
1609  NumVecElts /= 2;
1610  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1611  // Assume the pairwise shuffles add a cost.
1612  ShuffleCost += (IsPairwise + 1) *
1613  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1614  NumVecElts, SubTy);
1615  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1616  Ty = SubTy;
1617  ++LongVectorCount;
1618  }
1619 
1620  NumReduxLevels -= LongVectorCount;
1621 
1622  // The minimal length of the vector is limited by the real length of vector
1623  // operations performed on the current platform. That's why several final
1624  // reduction operations are performed on the vectors with the same
1625  // architecture-dependent length.
1626 
1627  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1628  // reductions need two shuffles on every level, but the last one. On that
1629  // level one of the shuffles is <0, u, u, ...> which is identity.
1630  unsigned NumShuffles = NumReduxLevels;
1631  if (IsPairwise && NumReduxLevels >= 1)
1632  NumShuffles += NumReduxLevels - 1;
1633  ShuffleCost += NumShuffles *
1634  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1635  0, Ty);
1636  ArithCost += NumReduxLevels *
1637  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1638  return ShuffleCost + ArithCost +
1639  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1640  }
1641 
1642  /// Try to calculate op costs for min/max reduction operations.
1643  /// \param CondTy Conditional type for the Select instruction.
1644  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1645  bool) {
1646  assert(Ty->isVectorTy() && "Expect a vector type");
1647  Type *ScalarTy = Ty->getVectorElementType();
1648  Type *ScalarCondTy = CondTy->getVectorElementType();
1649  unsigned NumVecElts = Ty->getVectorNumElements();
1650  unsigned NumReduxLevels = Log2_32(NumVecElts);
1651  unsigned CmpOpcode;
1652  if (Ty->isFPOrFPVectorTy()) {
1653  CmpOpcode = Instruction::FCmp;
1654  } else {
1655  assert(Ty->isIntOrIntVectorTy() &&
1656  "expecting floating point or integer type for min/max reduction");
1657  CmpOpcode = Instruction::ICmp;
1658  }
1659  unsigned MinMaxCost = 0;
1660  unsigned ShuffleCost = 0;
1661  auto *ConcreteTTI = static_cast<T *>(this);
1662  std::pair<unsigned, MVT> LT =
1663  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1664  unsigned LongVectorCount = 0;
1665  unsigned MVTLen =
1666  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1667  while (NumVecElts > MVTLen) {
1668  NumVecElts /= 2;
1669  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1670  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1671 
1672  // Assume the pairwise shuffles add a cost.
1673  ShuffleCost += (IsPairwise + 1) *
1674  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1675  NumVecElts, SubTy);
1676  MinMaxCost +=
1677  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1678  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1679  nullptr);
1680  Ty = SubTy;
1681  ++LongVectorCount;
1682  }
1683 
1684  NumReduxLevels -= LongVectorCount;
1685 
1686  // The minimal length of the vector is limited by the real length of vector
1687  // operations performed on the current platform. That's why several final
1688  // reduction opertions are perfomed on the vectors with the same
1689  // architecture-dependent length.
1690 
1691  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1692  // reductions need two shuffles on every level, but the last one. On that
1693  // level one of the shuffles is <0, u, u, ...> which is identity.
1694  unsigned NumShuffles = NumReduxLevels;
1695  if (IsPairwise && NumReduxLevels >= 1)
1696  NumShuffles += NumReduxLevels - 1;
1697  ShuffleCost += NumShuffles *
1698  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1699  0, Ty);
1700  MinMaxCost +=
1701  NumReduxLevels *
1702  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1703  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1704  nullptr));
1705  // The last min/max should be in vector registers and we counted it above.
1706  // So just need a single extractelement.
1707  return ShuffleCost + MinMaxCost +
1708  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1709  }
1710 
1711  unsigned getVectorSplitCost() { return 1; }
1712 
1713  /// @}
1714 };
1715 
1716 /// Concrete BasicTTIImpl that can be used if no further customization
1717 /// is needed.
1718 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1720 
1722 
1723  const TargetSubtargetInfo *ST;
1724  const TargetLoweringBase *TLI;
1725 
1726  const TargetSubtargetInfo *getST() const { return ST; }
1727  const TargetLoweringBase *getTLI() const { return TLI; }
1728 
1729 public:
1730  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1731 };
1732 
1733 } // end namespace llvm
1734 
1735 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:376
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
Definition: BasicTTIImpl.h:306
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:630
BitVector & set()
Definition: BitVector.h:397
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:621
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:402
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:205
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:219
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:535
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:641
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:560
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:289
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
A cache of @llvm.assume calls within a function.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:191
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:294
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:635
An instruction for reading from memory.
Definition: Instructions.h:169
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:278
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:619
bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:224
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:273
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
virtual Optional< unsigned > getCacheSize(unsigned Level) const
Return the cache size in bytes for the given level of cache.
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:579
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:812
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:264
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:539
mir Rename Register Operands
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:398
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:200
zlib style complession
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:556
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:825
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:687
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:454
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:468
virtual unsigned getMinPrefetchStride() const
Definition: BasicTTIImpl.h:543
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:398
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:254
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:402
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:274
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:307
ExtractSubvector Index indicates start offset.
virtual ~BasicTTIImplBase()=default
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:439
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:903
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:519
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
constexpr double e
Definition: MathExtras.h:57
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Attributes of a target dependent hardware loop.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:33
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1253
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:229
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:625
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:547
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:820
size_type size() const
Definition: SmallPtrSet.h:92
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:668
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:351
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:172
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:390
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:511
iterator end()
Definition: BasicBlock.h:275
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:134
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:504
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:566
Class to represent vector types.
Definition: DerivedTypes.h:432
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:459
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:872
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:344
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:212
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:214
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:674
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:412
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:614
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:906
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:435
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:865
block_iterator block_end() const
Definition: LoopInfo.h:160
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:477
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:260
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:328
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:602
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:525
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:210
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
LLVM Value Representation.
Definition: Value.h:74
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:334
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:441
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:284
Type * getElementType() const
Definition: DerivedTypes.h:399
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:248
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:391
This pass exposes codegen information to IR-level passes.
CacheLevel
The possible cache levels.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:237
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:233
virtual unsigned getMinPrefetchStride() const
Return the minimum stride necessary to trigger software prefetching.
block_iterator block_begin() const
Definition: LoopInfo.h:159
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:178
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1242
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:336
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:958
BRIND - Indirect branch.
Definition: ISDOpcodes.h:670
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
Definition: BasicTTIImpl.h:311