LLVM  9.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/MC/MCSchedule.h"
45 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Estimate a cost of Broadcast as an extract and sequence of insert
83  /// operations.
84  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85  assert(Ty->isVectorTy() && "Can only shuffle vectors");
86  unsigned Cost = 0;
87  // Broadcast cost is equal to the cost of extracting the zero'th element
88  // plus the cost of inserting it into every element of the result vector.
89  Cost += static_cast<T *>(this)->getVectorInstrCost(
90  Instruction::ExtractElement, Ty, 0);
91 
92  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93  Cost += static_cast<T *>(this)->getVectorInstrCost(
94  Instruction::InsertElement, Ty, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  unsigned getPermuteShuffleOverhead(Type *Ty) {
102  assert(Ty->isVectorTy() && "Can only shuffle vectors");
103  unsigned Cost = 0;
104  // Shuffle cost is equal to the cost of extracting element from its argument
105  // plus the cost of inserting them onto the result vector.
106 
107  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108  // index 0 of first vector, index 1 of second vector,index 2 of first
109  // vector and finally index 3 of second vector and insert them at index
110  // <0,1,2,3> of result vector.
111  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112  Cost += static_cast<T *>(this)
113  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114  Cost += static_cast<T *>(this)
115  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116  }
117  return Cost;
118  }
119 
120  /// Estimate a cost of subvector extraction as a sequence of extract and
121  /// insert operations.
122  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124  "Can only extract subvectors from vectors");
125  int NumSubElts = SubTy->getVectorNumElements();
126  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127  "SK_ExtractSubvector index out of range");
128 
129  unsigned Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += static_cast<T *>(this)->getVectorInstrCost(
135  Instruction::ExtractElement, Ty, i + Index);
136  Cost += static_cast<T *>(this)->getVectorInstrCost(
137  Instruction::InsertElement, SubTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubTy->getVectorNumElements();
148  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149  "SK_InsertSubvector index out of range");
150 
151  unsigned Cost = 0;
152  // Subvector insertion cost is equal to the cost of extracting element from
153  // the source type plus the cost of inserting them into the result vector
154  // type.
155  for (int i = 0; i != NumSubElts; ++i) {
156  Cost += static_cast<T *>(this)->getVectorInstrCost(
157  Instruction::ExtractElement, SubTy, i);
158  Cost += static_cast<T *>(this)->getVectorInstrCost(
159  Instruction::InsertElement, Ty, i + Index);
160  }
161  return Cost;
162  }
163 
164  /// Local query method delegates up to T which *must* implement this!
165  const TargetSubtargetInfo *getST() const {
166  return static_cast<const T *>(this)->getST();
167  }
168 
169  /// Local query method delegates up to T which *must* implement this!
170  const TargetLoweringBase *getTLI() const {
171  return static_cast<const T *>(this)->getTLI();
172  }
173 
174  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175  switch (M) {
176  case TTI::MIM_Unindexed:
177  return ISD::UNINDEXED;
178  case TTI::MIM_PreInc:
179  return ISD::PRE_INC;
180  case TTI::MIM_PreDec:
181  return ISD::PRE_DEC;
182  case TTI::MIM_PostInc:
183  return ISD::POST_INC;
184  case TTI::MIM_PostDec:
185  return ISD::POST_DEC;
186  }
187  llvm_unreachable("Unexpected MemIndexedMode");
188  }
189 
190 protected:
191  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192  : BaseT(DL) {}
193 
195 
196 public:
197  /// \name Scalar TTI Implementations
198  /// @{
200  unsigned AddressSpace, unsigned Alignment,
201  bool *Fast) const {
202  EVT E = EVT::getIntegerVT(Context, BitWidth);
203  return getTLI()->allowsMisalignedMemoryAccesses(
204  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205  }
206 
207  bool hasBranchDivergence() { return false; }
208 
209  bool isSourceOfDivergence(const Value *V) { return false; }
210 
211  bool isAlwaysUniform(const Value *V) { return false; }
212 
213  unsigned getFlatAddressSpace() {
214  // Return an invalid address space.
215  return -1;
216  }
217 
218  bool isLegalAddImmediate(int64_t imm) {
219  return getTLI()->isLegalAddImmediate(imm);
220  }
221 
222  bool isLegalICmpImmediate(int64_t imm) {
223  return getTLI()->isLegalICmpImmediate(imm);
224  }
225 
226  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
227  bool HasBaseReg, int64_t Scale,
228  unsigned AddrSpace, Instruction *I = nullptr) {
230  AM.BaseGV = BaseGV;
231  AM.BaseOffs = BaseOffset;
232  AM.HasBaseReg = HasBaseReg;
233  AM.Scale = Scale;
234  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235  }
236 
238  const DataLayout &DL) const {
239  EVT VT = getTLI()->getValueType(DL, Ty);
240  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241  }
242 
244  const DataLayout &DL) const {
245  EVT VT = getTLI()->getValueType(DL, Ty);
246  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247  }
248 
251  }
252 
253  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
254  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
256  AM.BaseGV = BaseGV;
257  AM.BaseOffs = BaseOffset;
258  AM.HasBaseReg = HasBaseReg;
259  AM.Scale = Scale;
260  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261  }
262 
263  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264  return getTLI()->isTruncateFree(Ty1, Ty2);
265  }
266 
268  return getTLI()->isProfitableToHoist(I);
269  }
270 
271  bool useAA() const { return getST()->useAA(); }
272 
273  bool isTypeLegal(Type *Ty) {
274  EVT VT = getTLI()->getValueType(DL, Ty);
275  return getTLI()->isTypeLegal(VT);
276  }
277 
278  int getGEPCost(Type *PointeeType, const Value *Ptr,
279  ArrayRef<const Value *> Operands) {
280  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281  }
282 
283  int getExtCost(const Instruction *I, const Value *Src) {
284  if (getTLI()->isExtFree(I))
286 
287  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
288  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289  if (getTLI()->isExtLoad(LI, I, DL))
291 
293  }
294 
295  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
297  return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
298  }
299 
300  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
301  ArrayRef<Type *> ParamTys, const User *U) {
302  if (IID == Intrinsic::cttz) {
303  if (getTLI()->isCheapToSpeculateCttz())
306  }
307 
308  if (IID == Intrinsic::ctlz) {
309  if (getTLI()->isCheapToSpeculateCtlz())
312  }
313 
314  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315  }
316 
318  unsigned &JumpTableSize) {
319  /// Try to find the estimated number of clusters. Note that the number of
320  /// clusters identified in this function could be different from the actural
321  /// numbers found in lowering. This function ignore switches that are
322  /// lowered with a mix of jump table / bit test / BTree. This function was
323  /// initially intended to be used when estimating the cost of switch in
324  /// inline cost heuristic, but it's a generic cost model to be used in other
325  /// places (e.g., in loop unrolling).
326  unsigned N = SI.getNumCases();
327  const TargetLoweringBase *TLI = getTLI();
328  const DataLayout &DL = this->getDataLayout();
329 
330  JumpTableSize = 0;
331  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332 
333  // Early exit if both a jump table and bit test are not allowed.
334  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
335  return N;
336 
337  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338  APInt MinCaseVal = MaxCaseVal;
339  for (auto CI : SI.cases()) {
340  const APInt &CaseVal = CI.getCaseValue()->getValue();
341  if (CaseVal.sgt(MaxCaseVal))
342  MaxCaseVal = CaseVal;
343  if (CaseVal.slt(MinCaseVal))
344  MinCaseVal = CaseVal;
345  }
346 
347  // Check if suitable for a bit test
348  if (N <= DL.getIndexSizeInBits(0u)) {
350  for (auto I : SI.cases())
351  Dests.insert(I.getCaseSuccessor());
352 
353  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354  DL))
355  return 1;
356  }
357 
358  // Check if suitable for a jump table.
359  if (IsJTAllowed) {
360  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361  return N;
362  uint64_t Range =
363  (MaxCaseVal - MinCaseVal)
364  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365  // Check whether a range of clusters is dense enough for a jump table
366  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367  JumpTableSize = Range;
368  return 1;
369  }
370  }
371  return N;
372  }
373 
374  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
375 
376  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
377 
379  const TargetLoweringBase *TLI = getTLI();
382  }
383 
384  bool haveFastSqrt(Type *Ty) {
385  const TargetLoweringBase *TLI = getTLI();
386  EVT VT = TLI->getValueType(DL, Ty);
387  return TLI->isTypeLegal(VT) &&
389  }
390 
392  return true;
393  }
394 
395  unsigned getFPOpCost(Type *Ty) {
396  // Check whether FADD is available, as a proxy for floating-point in
397  // general.
398  const TargetLoweringBase *TLI = getTLI();
399  EVT VT = TLI->getValueType(DL, Ty);
403  }
404 
405  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406  const TargetLoweringBase *TLI = getTLI();
407  switch (Opcode) {
408  default: break;
409  case Instruction::Trunc:
410  if (TLI->isTruncateFree(OpTy, Ty))
413  case Instruction::ZExt:
414  if (TLI->isZExtFree(OpTy, Ty))
417 
418  case Instruction::AddrSpaceCast:
420  Ty->getPointerAddressSpace()))
423  }
424 
425  return BaseT::getOperationCost(Opcode, Ty, OpTy);
426  }
427 
428  unsigned getInliningThresholdMultiplier() { return 1; }
429 
432  // This unrolling functionality is target independent, but to provide some
433  // motivation for its intended use, for x86:
434 
435  // According to the Intel 64 and IA-32 Architectures Optimization Reference
436  // Manual, Intel Core models and later have a loop stream detector (and
437  // associated uop queue) that can benefit from partial unrolling.
438  // The relevant requirements are:
439  // - The loop must have no more than 4 (8 for Nehalem and later) branches
440  // taken, and none of them may be calls.
441  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
442 
443  // According to the Software Optimization Guide for AMD Family 15h
444  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
445  // and loop buffer which can benefit from partial unrolling.
446  // The relevant requirements are:
447  // - The loop must have fewer than 16 branches
448  // - The loop must have less than 40 uops in all executed loop branches
449 
450  // The number of taken branches in a loop is hard to estimate here, and
451  // benchmarking has revealed that it is better not to be conservative when
452  // estimating the branch count. As a result, we'll ignore the branch limits
453  // until someone finds a case where it matters in practice.
454 
455  unsigned MaxOps;
456  const TargetSubtargetInfo *ST = getST();
457  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
458  MaxOps = PartialUnrollingThreshold;
459  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
460  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
461  else
462  return;
463 
464  // Scan the loop: don't unroll loops with calls.
465  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
466  ++I) {
467  BasicBlock *BB = *I;
468 
469  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
470  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
471  ImmutableCallSite CS(&*J);
472  if (const Function *F = CS.getCalledFunction()) {
473  if (!static_cast<T *>(this)->isLoweredToCall(F))
474  continue;
475  }
476 
477  return;
478  }
479  }
480 
481  // Enable runtime and partial unrolling up to the specified size.
482  // Enable using trip count upper bound to unroll loops.
483  UP.Partial = UP.Runtime = UP.UpperBound = true;
484  UP.PartialThreshold = MaxOps;
485 
486  // Avoid unrolling when optimizing for size.
487  UP.OptSizeThreshold = 0;
489 
490  // Set number of instructions optimized when "back edge"
491  // becomes "fall through" to default value of 2.
492  UP.BEInsns = 2;
493  }
494 
496  AssumptionCache &AC,
497  TargetLibraryInfo *LibInfo,
498  HardwareLoopInfo &HWLoopInfo) {
499  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
500  }
501 
503  if (isa<LoadInst>(I))
504  return getST()->getSchedModel().DefaultLoadLatency;
505 
507  }
508 
509  /// @}
510 
511  /// \name Vector TTI Implementations
512  /// @{
513 
514  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
515 
516  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
517 
518  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
519  /// are set if the result needs to be inserted and/or extracted from vectors.
520  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
521  assert(Ty->isVectorTy() && "Can only scalarize vectors");
522  unsigned Cost = 0;
523 
524  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
525  if (Insert)
526  Cost += static_cast<T *>(this)
527  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
528  if (Extract)
529  Cost += static_cast<T *>(this)
530  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
531  }
532 
533  return Cost;
534  }
535 
536  /// Estimate the overhead of scalarizing an instructions unique
537  /// non-constant operands. The types of the arguments are ordinarily
538  /// scalar, in which case the costs are multiplied with VF.
540  unsigned VF) {
541  unsigned Cost = 0;
542  SmallPtrSet<const Value*, 4> UniqueOperands;
543  for (const Value *A : Args) {
544  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
545  Type *VecTy = nullptr;
546  if (A->getType()->isVectorTy()) {
547  VecTy = A->getType();
548  // If A is a vector operand, VF should be 1 or correspond to A.
549  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
550  "Vector argument does not match VF");
551  }
552  else
553  VecTy = VectorType::get(A->getType(), VF);
554 
555  Cost += getScalarizationOverhead(VecTy, false, true);
556  }
557  }
558 
559  return Cost;
560  }
561 
563  assert(VecTy->isVectorTy());
564 
565  unsigned Cost = 0;
566 
567  Cost += getScalarizationOverhead(VecTy, true, false);
568  if (!Args.empty())
570  VecTy->getVectorNumElements());
571  else
572  // When no information on arguments is provided, we add the cost
573  // associated with one argument as a heuristic.
574  Cost += getScalarizationOverhead(VecTy, false, true);
575 
576  return Cost;
577  }
578 
579  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
580 
582  unsigned Opcode, Type *Ty,
588  // Check if any of the operands are vector operands.
589  const TargetLoweringBase *TLI = getTLI();
590  int ISD = TLI->InstructionOpcodeToISD(Opcode);
591  assert(ISD && "Invalid opcode");
592 
593  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
594 
595  bool IsFloat = Ty->isFPOrFPVectorTy();
596  // Assume that floating point arithmetic operations cost twice as much as
597  // integer operations.
598  unsigned OpCost = (IsFloat ? 2 : 1);
599 
600  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
601  // The operation is legal. Assume it costs 1.
602  // TODO: Once we have extract/insert subvector cost we need to use them.
603  return LT.first * OpCost;
604  }
605 
606  if (!TLI->isOperationExpand(ISD, LT.second)) {
607  // If the operation is custom lowered, then assume that the code is twice
608  // as expensive.
609  return LT.first * 2 * OpCost;
610  }
611 
612  // Else, assume that we need to scalarize this op.
613  // TODO: If one of the types get legalized by splitting, handle this
614  // similarly to what getCastInstrCost() does.
615  if (Ty->isVectorTy()) {
616  unsigned Num = Ty->getVectorNumElements();
617  unsigned Cost = static_cast<T *>(this)
618  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
619  // Return the cost of multiple scalar invocation plus the cost of
620  // inserting and extracting the values.
621  return getScalarizationOverhead(Ty, Args) + Num * Cost;
622  }
623 
624  // We don't know anything about this scalar instruction.
625  return OpCost;
626  }
627 
628  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
629  Type *SubTp) {
630  switch (Kind) {
631  case TTI::SK_Broadcast:
632  return getBroadcastShuffleOverhead(Tp);
633  case TTI::SK_Select:
634  case TTI::SK_Reverse:
635  case TTI::SK_Transpose:
638  return getPermuteShuffleOverhead(Tp);
640  return getExtractSubvectorOverhead(Tp, Index, SubTp);
642  return getInsertSubvectorOverhead(Tp, Index, SubTp);
643  }
644  llvm_unreachable("Unknown TTI::ShuffleKind");
645  }
646 
647  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
648  const Instruction *I = nullptr) {
649  const TargetLoweringBase *TLI = getTLI();
650  int ISD = TLI->InstructionOpcodeToISD(Opcode);
651  assert(ISD && "Invalid opcode");
652  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
653  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
654 
655  // Check for NOOP conversions.
656  if (SrcLT.first == DstLT.first &&
657  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
658 
659  // Bitcast between types that are legalized to the same type are free.
660  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
661  return 0;
662  }
663 
664  if (Opcode == Instruction::Trunc &&
665  TLI->isTruncateFree(SrcLT.second, DstLT.second))
666  return 0;
667 
668  if (Opcode == Instruction::ZExt &&
669  TLI->isZExtFree(SrcLT.second, DstLT.second))
670  return 0;
671 
672  if (Opcode == Instruction::AddrSpaceCast &&
674  Dst->getPointerAddressSpace()))
675  return 0;
676 
677  // If this is a zext/sext of a load, return 0 if the corresponding
678  // extending load exists on target.
679  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
680  I && isa<LoadInst>(I->getOperand(0))) {
681  EVT ExtVT = EVT::getEVT(Dst);
682  EVT LoadVT = EVT::getEVT(Src);
683  unsigned LType =
684  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
685  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
686  return 0;
687  }
688 
689  // If the cast is marked as legal (or promote) then assume low cost.
690  if (SrcLT.first == DstLT.first &&
691  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
692  return 1;
693 
694  // Handle scalar conversions.
695  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
696  // Scalar bitcasts are usually free.
697  if (Opcode == Instruction::BitCast)
698  return 0;
699 
700  // Just check the op cost. If the operation is legal then assume it costs
701  // 1.
702  if (!TLI->isOperationExpand(ISD, DstLT.second))
703  return 1;
704 
705  // Assume that illegal scalar instruction are expensive.
706  return 4;
707  }
708 
709  // Check vector-to-vector casts.
710  if (Dst->isVectorTy() && Src->isVectorTy()) {
711  // If the cast is between same-sized registers, then the check is simple.
712  if (SrcLT.first == DstLT.first &&
713  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
714 
715  // Assume that Zext is done using AND.
716  if (Opcode == Instruction::ZExt)
717  return 1;
718 
719  // Assume that sext is done using SHL and SRA.
720  if (Opcode == Instruction::SExt)
721  return 2;
722 
723  // Just check the op cost. If the operation is legal then assume it
724  // costs
725  // 1 and multiply by the type-legalization overhead.
726  if (!TLI->isOperationExpand(ISD, DstLT.second))
727  return SrcLT.first * 1;
728  }
729 
730  // If we are legalizing by splitting, query the concrete TTI for the cost
731  // of casting the original vector twice. We also need to factor in the
732  // cost of the split itself. Count that as 1, to be consistent with
733  // TLI->getTypeLegalizationCost().
734  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
736  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
738  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
739  Dst->getVectorNumElements() / 2);
740  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
741  Src->getVectorNumElements() / 2);
742  T *TTI = static_cast<T *>(this);
743  return TTI->getVectorSplitCost() +
744  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
745  }
746 
747  // In other cases where the source or destination are illegal, assume
748  // the operation will get scalarized.
749  unsigned Num = Dst->getVectorNumElements();
750  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
751  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
752 
753  // Return the cost of multiple scalar invocation plus the cost of
754  // inserting and extracting the values.
755  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
756  }
757 
758  // We already handled vector-to-vector and scalar-to-scalar conversions.
759  // This
760  // is where we handle bitcast between vectors and scalars. We need to assume
761  // that the conversion is scalarized in one way or another.
762  if (Opcode == Instruction::BitCast)
763  // Illegal bitcasts are done by storing and loading from a stack slot.
764  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
765  : 0) +
766  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
767  : 0);
768 
769  llvm_unreachable("Unhandled cast");
770  }
771 
772  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
773  VectorType *VecTy, unsigned Index) {
774  return static_cast<T *>(this)->getVectorInstrCost(
775  Instruction::ExtractElement, VecTy, Index) +
776  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
777  VecTy->getElementType());
778  }
779 
780  unsigned getCFInstrCost(unsigned Opcode) {
781  // Branches are assumed to be predicted.
782  return 0;
783  }
784 
785  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
786  const Instruction *I) {
787  const TargetLoweringBase *TLI = getTLI();
788  int ISD = TLI->InstructionOpcodeToISD(Opcode);
789  assert(ISD && "Invalid opcode");
790 
791  // Selects on vectors are actually vector selects.
792  if (ISD == ISD::SELECT) {
793  assert(CondTy && "CondTy must exist");
794  if (CondTy->isVectorTy())
795  ISD = ISD::VSELECT;
796  }
797  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
798 
799  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
800  !TLI->isOperationExpand(ISD, LT.second)) {
801  // The operation is legal. Assume it costs 1. Multiply
802  // by the type-legalization overhead.
803  return LT.first * 1;
804  }
805 
806  // Otherwise, assume that the cast is scalarized.
807  // TODO: If one of the types get legalized by splitting, handle this
808  // similarly to what getCastInstrCost() does.
809  if (ValTy->isVectorTy()) {
810  unsigned Num = ValTy->getVectorNumElements();
811  if (CondTy)
812  CondTy = CondTy->getScalarType();
813  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
814  Opcode, ValTy->getScalarType(), CondTy, I);
815 
816  // Return the cost of multiple scalar invocation plus the cost of
817  // inserting and extracting the values.
818  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
819  }
820 
821  // Unknown scalar opcode.
822  return 1;
823  }
824 
825  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
826  std::pair<unsigned, MVT> LT =
827  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
828 
829  return LT.first;
830  }
831 
832  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
833  unsigned AddressSpace, const Instruction *I = nullptr) {
834  assert(!Src->isVoidTy() && "Invalid type");
835  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
836 
837  // Assuming that all loads of legal types cost 1.
838  unsigned Cost = LT.first;
839 
840  if (Src->isVectorTy() &&
841  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
842  // This is a vector load that legalizes to a larger type than the vector
843  // itself. Unless the corresponding extending load or truncating store is
844  // legal, then this will scalarize.
846  EVT MemVT = getTLI()->getValueType(DL, Src);
847  if (Opcode == Instruction::Store)
848  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
849  else
850  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
851 
852  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
853  // This is a vector load/store for some illegal type that is scalarized.
854  // We must account for the cost of building or decomposing the vector.
855  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
856  Opcode == Instruction::Store);
857  }
858  }
859 
860  return Cost;
861  }
862 
863  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
864  unsigned Factor,
865  ArrayRef<unsigned> Indices,
866  unsigned Alignment, unsigned AddressSpace,
867  bool UseMaskForCond = false,
868  bool UseMaskForGaps = false) {
869  VectorType *VT = dyn_cast<VectorType>(VecTy);
870  assert(VT && "Expect a vector type for interleaved memory op");
871 
872  unsigned NumElts = VT->getNumElements();
873  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
874 
875  unsigned NumSubElts = NumElts / Factor;
876  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
877 
878  // Firstly, the cost of load/store operation.
879  unsigned Cost;
880  if (UseMaskForCond || UseMaskForGaps)
881  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
882  Opcode, VecTy, Alignment, AddressSpace);
883  else
884  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
885  AddressSpace);
886 
887  // Legalize the vector type, and get the legalized and unlegalized type
888  // sizes.
889  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
890  unsigned VecTySize =
891  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
892  unsigned VecTyLTSize = VecTyLT.getStoreSize();
893 
894  // Return the ceiling of dividing A by B.
895  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
896 
897  // Scale the cost of the memory operation by the fraction of legalized
898  // instructions that will actually be used. We shouldn't account for the
899  // cost of dead instructions since they will be removed.
900  //
901  // E.g., An interleaved load of factor 8:
902  // %vec = load <16 x i64>, <16 x i64>* %ptr
903  // %v0 = shufflevector %vec, undef, <0, 8>
904  //
905  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
906  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
907  // type). The other loads are unused.
908  //
909  // We only scale the cost of loads since interleaved store groups aren't
910  // allowed to have gaps.
911  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
912  // The number of loads of a legal type it will take to represent a load
913  // of the unlegalized vector type.
914  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
915 
916  // The number of elements of the unlegalized type that correspond to a
917  // single legal instruction.
918  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
919 
920  // Determine which legal instructions will be used.
921  BitVector UsedInsts(NumLegalInsts, false);
922  for (unsigned Index : Indices)
923  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
924  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
925 
926  // Scale the cost of the load by the fraction of legal instructions that
927  // will be used.
928  Cost *= UsedInsts.count() / NumLegalInsts;
929  }
930 
931  // Then plus the cost of interleave operation.
932  if (Opcode == Instruction::Load) {
933  // The interleave cost is similar to extract sub vectors' elements
934  // from the wide vector, and insert them into sub vectors.
935  //
936  // E.g. An interleaved load of factor 2 (with one member of index 0):
937  // %vec = load <8 x i32>, <8 x i32>* %ptr
938  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
939  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
940  // <8 x i32> vector and insert them into a <4 x i32> vector.
941 
942  assert(Indices.size() <= Factor &&
943  "Interleaved memory op has too many members");
944 
945  for (unsigned Index : Indices) {
946  assert(Index < Factor && "Invalid index for interleaved memory op");
947 
948  // Extract elements from loaded vector for each sub vector.
949  for (unsigned i = 0; i < NumSubElts; i++)
950  Cost += static_cast<T *>(this)->getVectorInstrCost(
951  Instruction::ExtractElement, VT, Index + i * Factor);
952  }
953 
954  unsigned InsSubCost = 0;
955  for (unsigned i = 0; i < NumSubElts; i++)
956  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
957  Instruction::InsertElement, SubVT, i);
958 
959  Cost += Indices.size() * InsSubCost;
960  } else {
961  // The interleave cost is extract all elements from sub vectors, and
962  // insert them into the wide vector.
963  //
964  // E.g. An interleaved store of factor 2:
965  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
966  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
967  // The cost is estimated as extract all elements from both <4 x i32>
968  // vectors and insert into the <8 x i32> vector.
969 
970  unsigned ExtSubCost = 0;
971  for (unsigned i = 0; i < NumSubElts; i++)
972  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
973  Instruction::ExtractElement, SubVT, i);
974  Cost += ExtSubCost * Factor;
975 
976  for (unsigned i = 0; i < NumElts; i++)
977  Cost += static_cast<T *>(this)
978  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
979  }
980 
981  if (!UseMaskForCond)
982  return Cost;
983 
984  Type *I8Type = Type::getInt8Ty(VT->getContext());
985  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
986  SubVT = VectorType::get(I8Type, NumSubElts);
987 
988  // The Mask shuffling cost is extract all the elements of the Mask
989  // and insert each of them Factor times into the wide vector:
990  //
991  // E.g. an interleaved group with factor 3:
992  // %mask = icmp ult <8 x i32> %vec1, %vec2
993  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
994  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
995  // The cost is estimated as extract all mask elements from the <8xi1> mask
996  // vector and insert them factor times into the <24xi1> shuffled mask
997  // vector.
998  for (unsigned i = 0; i < NumSubElts; i++)
999  Cost += static_cast<T *>(this)->getVectorInstrCost(
1000  Instruction::ExtractElement, SubVT, i);
1001 
1002  for (unsigned i = 0; i < NumElts; i++)
1003  Cost += static_cast<T *>(this)->getVectorInstrCost(
1004  Instruction::InsertElement, MaskVT, i);
1005 
1006  // The Gaps mask is invariant and created outside the loop, therefore the
1007  // cost of creating it is not accounted for here. However if we have both
1008  // a MaskForGaps and some other mask that guards the execution of the
1009  // memory access, we need to account for the cost of And-ing the two masks
1010  // inside the loop.
1011  if (UseMaskForGaps)
1012  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1013  BinaryOperator::And, MaskVT);
1014 
1015  return Cost;
1016  }
1017 
1018  /// Get intrinsic cost based on arguments.
1021  unsigned VF = 1) {
1022  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1023  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1024  auto *ConcreteTTI = static_cast<T *>(this);
1025 
1026  switch (IID) {
1027  default: {
1028  // Assume that we need to scalarize this intrinsic.
1029  SmallVector<Type *, 4> Types;
1030  for (Value *Op : Args) {
1031  Type *OpTy = Op->getType();
1032  assert(VF == 1 || !OpTy->isVectorTy());
1033  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1034  }
1035 
1036  if (VF > 1 && !RetTy->isVoidTy())
1037  RetTy = VectorType::get(RetTy, VF);
1038 
1039  // Compute the scalarization overhead based on Args for a vector
1040  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1041  // CostModel will pass a vector RetTy and VF is 1.
1042  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1043  if (RetVF > 1 || VF > 1) {
1044  ScalarizationCost = 0;
1045  if (!RetTy->isVoidTy())
1046  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1047  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1048  }
1049 
1050  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1051  ScalarizationCost);
1052  }
1053  case Intrinsic::masked_scatter: {
1054  assert(VF == 1 && "Can't vectorize types here.");
1055  Value *Mask = Args[3];
1056  bool VarMask = !isa<Constant>(Mask);
1057  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1058  return ConcreteTTI->getGatherScatterOpCost(
1059  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1060  }
1061  case Intrinsic::masked_gather: {
1062  assert(VF == 1 && "Can't vectorize types here.");
1063  Value *Mask = Args[2];
1064  bool VarMask = !isa<Constant>(Mask);
1065  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1066  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1067  Args[0], VarMask, Alignment);
1068  }
1069  case Intrinsic::experimental_vector_reduce_add:
1070  case Intrinsic::experimental_vector_reduce_mul:
1071  case Intrinsic::experimental_vector_reduce_and:
1072  case Intrinsic::experimental_vector_reduce_or:
1073  case Intrinsic::experimental_vector_reduce_xor:
1074  case Intrinsic::experimental_vector_reduce_v2_fadd:
1075  case Intrinsic::experimental_vector_reduce_v2_fmul:
1076  case Intrinsic::experimental_vector_reduce_smax:
1077  case Intrinsic::experimental_vector_reduce_smin:
1078  case Intrinsic::experimental_vector_reduce_fmax:
1079  case Intrinsic::experimental_vector_reduce_fmin:
1080  case Intrinsic::experimental_vector_reduce_umax:
1081  case Intrinsic::experimental_vector_reduce_umin:
1082  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1083  case Intrinsic::fshl:
1084  case Intrinsic::fshr: {
1085  Value *X = Args[0];
1086  Value *Y = Args[1];
1087  Value *Z = Args[2];
1088  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1089  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1090  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1091  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1093  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1094  : TTI::OP_None;
1095  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1096  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1097  unsigned Cost = 0;
1098  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1099  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1100  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1101  OpKindX, OpKindZ, OpPropsX);
1102  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1103  OpKindY, OpKindZ, OpPropsY);
1104  // Non-constant shift amounts requires a modulo.
1105  if (OpKindZ != TTI::OK_UniformConstantValue &&
1107  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1108  OpKindZ, OpKindBW, OpPropsZ,
1109  OpPropsBW);
1110  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1111  if (X != Y) {
1112  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1113  if (RetVF > 1)
1114  CondTy = VectorType::get(CondTy, RetVF);
1115  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1116  CondTy, nullptr);
1117  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1118  CondTy, nullptr);
1119  }
1120  return Cost;
1121  }
1122  }
1123  }
1124 
1125  /// Get intrinsic cost based on argument types.
1126  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1127  /// cost of scalarizing the arguments and the return value will be computed
1128  /// based on types.
1130  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1131  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1132  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1133  auto *ConcreteTTI = static_cast<T *>(this);
1134 
1136  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1137  switch (IID) {
1138  default: {
1139  // Assume that we need to scalarize this intrinsic.
1140  unsigned ScalarizationCost = ScalarizationCostPassed;
1141  unsigned ScalarCalls = 1;
1142  Type *ScalarRetTy = RetTy;
1143  if (RetTy->isVectorTy()) {
1144  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1145  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1146  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1147  ScalarRetTy = RetTy->getScalarType();
1148  }
1149  SmallVector<Type *, 4> ScalarTys;
1150  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1151  Type *Ty = Tys[i];
1152  if (Ty->isVectorTy()) {
1153  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1154  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1155  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1156  Ty = Ty->getScalarType();
1157  }
1158  ScalarTys.push_back(Ty);
1159  }
1160  if (ScalarCalls == 1)
1161  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1162 
1163  unsigned ScalarCost =
1164  ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1165 
1166  return ScalarCalls * ScalarCost + ScalarizationCost;
1167  }
1168  // Look for intrinsics that can be lowered directly or turned into a scalar
1169  // intrinsic call.
1170  case Intrinsic::sqrt:
1171  ISDs.push_back(ISD::FSQRT);
1172  break;
1173  case Intrinsic::sin:
1174  ISDs.push_back(ISD::FSIN);
1175  break;
1176  case Intrinsic::cos:
1177  ISDs.push_back(ISD::FCOS);
1178  break;
1179  case Intrinsic::exp:
1180  ISDs.push_back(ISD::FEXP);
1181  break;
1182  case Intrinsic::exp2:
1183  ISDs.push_back(ISD::FEXP2);
1184  break;
1185  case Intrinsic::log:
1186  ISDs.push_back(ISD::FLOG);
1187  break;
1188  case Intrinsic::log10:
1189  ISDs.push_back(ISD::FLOG10);
1190  break;
1191  case Intrinsic::log2:
1192  ISDs.push_back(ISD::FLOG2);
1193  break;
1194  case Intrinsic::fabs:
1195  ISDs.push_back(ISD::FABS);
1196  break;
1197  case Intrinsic::canonicalize:
1199  break;
1200  case Intrinsic::minnum:
1201  ISDs.push_back(ISD::FMINNUM);
1202  if (FMF.noNaNs())
1203  ISDs.push_back(ISD::FMINIMUM);
1204  break;
1205  case Intrinsic::maxnum:
1206  ISDs.push_back(ISD::FMAXNUM);
1207  if (FMF.noNaNs())
1208  ISDs.push_back(ISD::FMAXIMUM);
1209  break;
1210  case Intrinsic::copysign:
1211  ISDs.push_back(ISD::FCOPYSIGN);
1212  break;
1213  case Intrinsic::floor:
1214  ISDs.push_back(ISD::FFLOOR);
1215  break;
1216  case Intrinsic::ceil:
1217  ISDs.push_back(ISD::FCEIL);
1218  break;
1219  case Intrinsic::trunc:
1220  ISDs.push_back(ISD::FTRUNC);
1221  break;
1222  case Intrinsic::nearbyint:
1223  ISDs.push_back(ISD::FNEARBYINT);
1224  break;
1225  case Intrinsic::rint:
1226  ISDs.push_back(ISD::FRINT);
1227  break;
1228  case Intrinsic::round:
1229  ISDs.push_back(ISD::FROUND);
1230  break;
1231  case Intrinsic::pow:
1232  ISDs.push_back(ISD::FPOW);
1233  break;
1234  case Intrinsic::fma:
1235  ISDs.push_back(ISD::FMA);
1236  break;
1237  case Intrinsic::fmuladd:
1238  ISDs.push_back(ISD::FMA);
1239  break;
1240  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1241  case Intrinsic::lifetime_start:
1242  case Intrinsic::lifetime_end:
1243  case Intrinsic::sideeffect:
1244  return 0;
1245  case Intrinsic::masked_store:
1246  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1247  0);
1248  case Intrinsic::masked_load:
1249  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1250  case Intrinsic::experimental_vector_reduce_add:
1251  return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1252  /*IsPairwiseForm=*/false);
1253  case Intrinsic::experimental_vector_reduce_mul:
1254  return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1255  /*IsPairwiseForm=*/false);
1256  case Intrinsic::experimental_vector_reduce_and:
1257  return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1258  /*IsPairwiseForm=*/false);
1259  case Intrinsic::experimental_vector_reduce_or:
1260  return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1261  /*IsPairwiseForm=*/false);
1262  case Intrinsic::experimental_vector_reduce_xor:
1263  return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1264  /*IsPairwiseForm=*/false);
1265  case Intrinsic::experimental_vector_reduce_v2_fadd:
1266  return ConcreteTTI->getArithmeticReductionCost(
1267  Instruction::FAdd, Tys[0],
1268  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1269  // reductions.
1270  case Intrinsic::experimental_vector_reduce_v2_fmul:
1271  return ConcreteTTI->getArithmeticReductionCost(
1272  Instruction::FMul, Tys[0],
1273  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1274  // reductions.
1275  case Intrinsic::experimental_vector_reduce_smax:
1276  case Intrinsic::experimental_vector_reduce_smin:
1277  case Intrinsic::experimental_vector_reduce_fmax:
1278  case Intrinsic::experimental_vector_reduce_fmin:
1279  return ConcreteTTI->getMinMaxReductionCost(
1280  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1281  /*IsUnsigned=*/true);
1282  case Intrinsic::experimental_vector_reduce_umax:
1283  case Intrinsic::experimental_vector_reduce_umin:
1284  return ConcreteTTI->getMinMaxReductionCost(
1285  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1286  /*IsUnsigned=*/false);
1287  case Intrinsic::sadd_sat:
1288  case Intrinsic::ssub_sat: {
1289  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1290  if (RetVF > 1)
1291  CondTy = VectorType::get(CondTy, RetVF);
1292 
1293  Type *OpTy = StructType::create({RetTy, CondTy});
1294  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1295  ? Intrinsic::sadd_with_overflow
1296  : Intrinsic::ssub_with_overflow;
1297 
1298  // SatMax -> Overflow && SumDiff < 0
1299  // SatMin -> Overflow && SumDiff >= 0
1300  unsigned Cost = 0;
1301  Cost += ConcreteTTI->getIntrinsicInstrCost(
1302  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1303  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1304  CondTy, nullptr);
1305  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1306  CondTy, nullptr);
1307  return Cost;
1308  }
1309  case Intrinsic::uadd_sat:
1310  case Intrinsic::usub_sat: {
1311  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1312  if (RetVF > 1)
1313  CondTy = VectorType::get(CondTy, RetVF);
1314 
1315  Type *OpTy = StructType::create({RetTy, CondTy});
1316  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1317  ? Intrinsic::uadd_with_overflow
1318  : Intrinsic::usub_with_overflow;
1319 
1320  unsigned Cost = 0;
1321  Cost += ConcreteTTI->getIntrinsicInstrCost(
1322  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1323  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1324  CondTy, nullptr);
1325  return Cost;
1326  }
1327  case Intrinsic::smul_fix:
1328  case Intrinsic::umul_fix: {
1329  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1330  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1331  if (RetVF > 1)
1332  ExtTy = VectorType::get(ExtTy, RetVF);
1333 
1334  unsigned ExtOp =
1335  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1336 
1337  unsigned Cost = 0;
1338  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1339  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1340  Cost +=
1341  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1342  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1345  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1348  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1349  return Cost;
1350  }
1351  case Intrinsic::sadd_with_overflow:
1352  case Intrinsic::ssub_with_overflow: {
1353  Type *SumTy = RetTy->getContainedType(0);
1354  Type *OverflowTy = RetTy->getContainedType(1);
1355  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1357  : BinaryOperator::Sub;
1358 
1359  // LHSSign -> LHS >= 0
1360  // RHSSign -> RHS >= 0
1361  // SumSign -> Sum >= 0
1362  //
1363  // Add:
1364  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1365  // Sub:
1366  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1367  unsigned Cost = 0;
1368  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1369  Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1370  OverflowTy, nullptr);
1371  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1372  BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1373  Cost +=
1374  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1375  return Cost;
1376  }
1377  case Intrinsic::uadd_with_overflow:
1378  case Intrinsic::usub_with_overflow: {
1379  Type *SumTy = RetTy->getContainedType(0);
1380  Type *OverflowTy = RetTy->getContainedType(1);
1381  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1383  : BinaryOperator::Sub;
1384 
1385  unsigned Cost = 0;
1386  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1387  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1388  OverflowTy, nullptr);
1389  return Cost;
1390  }
1391  case Intrinsic::smul_with_overflow:
1392  case Intrinsic::umul_with_overflow: {
1393  Type *MulTy = RetTy->getContainedType(0);
1394  Type *OverflowTy = RetTy->getContainedType(1);
1395  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1396  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1397  if (MulTy->isVectorTy())
1398  ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1399 
1400  unsigned ExtOp =
1401  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1402 
1403  unsigned Cost = 0;
1404  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1405  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1406  Cost +=
1407  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1408  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1411 
1412  if (IID == Intrinsic::smul_with_overflow)
1413  Cost += ConcreteTTI->getArithmeticInstrCost(
1414  Instruction::AShr, MulTy, TTI::OK_AnyValue,
1416 
1417  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1418  OverflowTy, nullptr);
1419  return Cost;
1420  }
1421  case Intrinsic::ctpop:
1422  ISDs.push_back(ISD::CTPOP);
1423  // In case of legalization use TCC_Expensive. This is cheaper than a
1424  // library call but still not a cheap instruction.
1425  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1426  break;
1427  // FIXME: ctlz, cttz, ...
1428  }
1429 
1430  const TargetLoweringBase *TLI = getTLI();
1431  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1432 
1433  SmallVector<unsigned, 2> LegalCost;
1434  SmallVector<unsigned, 2> CustomCost;
1435  for (unsigned ISD : ISDs) {
1436  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1437  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1438  TLI->isFAbsFree(LT.second)) {
1439  return 0;
1440  }
1441 
1442  // The operation is legal. Assume it costs 1.
1443  // If the type is split to multiple registers, assume that there is some
1444  // overhead to this.
1445  // TODO: Once we have extract/insert subvector cost we need to use them.
1446  if (LT.first > 1)
1447  LegalCost.push_back(LT.first * 2);
1448  else
1449  LegalCost.push_back(LT.first * 1);
1450  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1451  // If the operation is custom lowered then assume
1452  // that the code is twice as expensive.
1453  CustomCost.push_back(LT.first * 2);
1454  }
1455  }
1456 
1457  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1458  if (MinLegalCostI != LegalCost.end())
1459  return *MinLegalCostI;
1460 
1461  auto MinCustomCostI =
1462  std::min_element(CustomCost.begin(), CustomCost.end());
1463  if (MinCustomCostI != CustomCost.end())
1464  return *MinCustomCostI;
1465 
1466  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1467  // point mul followed by an add.
1468  if (IID == Intrinsic::fmuladd)
1469  return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1470  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1471 
1472  // Else, assume that we need to scalarize this intrinsic. For math builtins
1473  // this will emit a costly libcall, adding call overhead and spills. Make it
1474  // very expensive.
1475  if (RetTy->isVectorTy()) {
1476  unsigned ScalarizationCost =
1477  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1478  ? ScalarizationCostPassed
1479  : getScalarizationOverhead(RetTy, true, false));
1480  unsigned ScalarCalls = RetTy->getVectorNumElements();
1481  SmallVector<Type *, 4> ScalarTys;
1482  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1483  Type *Ty = Tys[i];
1484  if (Ty->isVectorTy())
1485  Ty = Ty->getScalarType();
1486  ScalarTys.push_back(Ty);
1487  }
1488  unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1489  IID, RetTy->getScalarType(), ScalarTys, FMF);
1490  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1491  if (Tys[i]->isVectorTy()) {
1492  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1493  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1494  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1495  }
1496  }
1497 
1498  return ScalarCalls * ScalarCost + ScalarizationCost;
1499  }
1500 
1501  // This is going to be turned into a library call, make it expensive.
1502  return SingleCallCost;
1503  }
1504 
1505  /// Compute a cost of the given call instruction.
1506  ///
1507  /// Compute the cost of calling function F with return type RetTy and
1508  /// argument types Tys. F might be nullptr, in this case the cost of an
1509  /// arbitrary call with the specified signature will be returned.
1510  /// This is used, for instance, when we estimate call of a vector
1511  /// counterpart of the given function.
1512  /// \param F Called function, might be nullptr.
1513  /// \param RetTy Return value types.
1514  /// \param Tys Argument types.
1515  /// \returns The cost of Call instruction.
1517  return 10;
1518  }
1519 
1520  unsigned getNumberOfParts(Type *Tp) {
1521  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1522  return LT.first;
1523  }
1524 
1526  const SCEV *) {
1527  return 0;
1528  }
1529 
1530  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1531  /// We're assuming that reduction operation are performing the following way:
1532  /// 1. Non-pairwise reduction
1533  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1534  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1535  /// \----------------v-------------/ \----------v------------/
1536  /// n/2 elements n/2 elements
1537  /// %red1 = op <n x t> %val, <n x t> val1
1538  /// After this operation we have a vector %red1 where only the first n/2
1539  /// elements are meaningful, the second n/2 elements are undefined and can be
1540  /// dropped. All other operations are actually working with the vector of
1541  /// length n/2, not n, though the real vector length is still n.
1542  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1543  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1544  /// \----------------v-------------/ \----------v------------/
1545  /// n/4 elements 3*n/4 elements
1546  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1547  /// length n/2, the resulting vector has length n/4 etc.
1548  /// 2. Pairwise reduction:
1549  /// Everything is the same except for an additional shuffle operation which
1550  /// is used to produce operands for pairwise kind of reductions.
1551  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1552  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1553  /// \-------------v----------/ \----------v------------/
1554  /// n/2 elements n/2 elements
1555  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1556  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1557  /// \-------------v----------/ \----------v------------/
1558  /// n/2 elements n/2 elements
1559  /// %red1 = op <n x t> %val1, <n x t> val2
1560  /// Again, the operation is performed on <n x t> vector, but the resulting
1561  /// vector %red1 is <n/2 x t> vector.
1562  ///
1563  /// The cost model should take into account that the actual length of the
1564  /// vector is reduced on each iteration.
1565  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1566  bool IsPairwise) {
1567  assert(Ty->isVectorTy() && "Expect a vector type");
1568  Type *ScalarTy = Ty->getVectorElementType();
1569  unsigned NumVecElts = Ty->getVectorNumElements();
1570  unsigned NumReduxLevels = Log2_32(NumVecElts);
1571  unsigned ArithCost = 0;
1572  unsigned ShuffleCost = 0;
1573  auto *ConcreteTTI = static_cast<T *>(this);
1574  std::pair<unsigned, MVT> LT =
1575  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1576  unsigned LongVectorCount = 0;
1577  unsigned MVTLen =
1578  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1579  while (NumVecElts > MVTLen) {
1580  NumVecElts /= 2;
1581  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1582  // Assume the pairwise shuffles add a cost.
1583  ShuffleCost += (IsPairwise + 1) *
1584  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1585  NumVecElts, SubTy);
1586  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1587  Ty = SubTy;
1588  ++LongVectorCount;
1589  }
1590 
1591  NumReduxLevels -= LongVectorCount;
1592 
1593  // The minimal length of the vector is limited by the real length of vector
1594  // operations performed on the current platform. That's why several final
1595  // reduction operations are performed on the vectors with the same
1596  // architecture-dependent length.
1597 
1598  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1599  // reductions need two shuffles on every level, but the last one. On that
1600  // level one of the shuffles is <0, u, u, ...> which is identity.
1601  unsigned NumShuffles = NumReduxLevels;
1602  if (IsPairwise && NumReduxLevels >= 1)
1603  NumShuffles += NumReduxLevels - 1;
1604  ShuffleCost += NumShuffles *
1605  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1606  0, Ty);
1607  ArithCost += NumReduxLevels *
1608  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1609  return ShuffleCost + ArithCost +
1610  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1611  }
1612 
1613  /// Try to calculate op costs for min/max reduction operations.
1614  /// \param CondTy Conditional type for the Select instruction.
1615  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1616  bool) {
1617  assert(Ty->isVectorTy() && "Expect a vector type");
1618  Type *ScalarTy = Ty->getVectorElementType();
1619  Type *ScalarCondTy = CondTy->getVectorElementType();
1620  unsigned NumVecElts = Ty->getVectorNumElements();
1621  unsigned NumReduxLevels = Log2_32(NumVecElts);
1622  unsigned CmpOpcode;
1623  if (Ty->isFPOrFPVectorTy()) {
1624  CmpOpcode = Instruction::FCmp;
1625  } else {
1626  assert(Ty->isIntOrIntVectorTy() &&
1627  "expecting floating point or integer type for min/max reduction");
1628  CmpOpcode = Instruction::ICmp;
1629  }
1630  unsigned MinMaxCost = 0;
1631  unsigned ShuffleCost = 0;
1632  auto *ConcreteTTI = static_cast<T *>(this);
1633  std::pair<unsigned, MVT> LT =
1634  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1635  unsigned LongVectorCount = 0;
1636  unsigned MVTLen =
1637  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1638  while (NumVecElts > MVTLen) {
1639  NumVecElts /= 2;
1640  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1641  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1642 
1643  // Assume the pairwise shuffles add a cost.
1644  ShuffleCost += (IsPairwise + 1) *
1645  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1646  NumVecElts, SubTy);
1647  MinMaxCost +=
1648  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1649  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1650  nullptr);
1651  Ty = SubTy;
1652  ++LongVectorCount;
1653  }
1654 
1655  NumReduxLevels -= LongVectorCount;
1656 
1657  // The minimal length of the vector is limited by the real length of vector
1658  // operations performed on the current platform. That's why several final
1659  // reduction opertions are perfomed on the vectors with the same
1660  // architecture-dependent length.
1661 
1662  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1663  // reductions need two shuffles on every level, but the last one. On that
1664  // level one of the shuffles is <0, u, u, ...> which is identity.
1665  unsigned NumShuffles = NumReduxLevels;
1666  if (IsPairwise && NumReduxLevels >= 1)
1667  NumShuffles += NumReduxLevels - 1;
1668  ShuffleCost += NumShuffles *
1669  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1670  0, Ty);
1671  MinMaxCost +=
1672  NumReduxLevels *
1673  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1674  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1675  nullptr));
1676  // The last min/max should be in vector registers and we counted it above.
1677  // So just need a single extractelement.
1678  return ShuffleCost + MinMaxCost +
1679  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1680  }
1681 
1682  unsigned getVectorSplitCost() { return 1; }
1683 
1684  /// @}
1685 };
1686 
1687 /// Concrete BasicTTIImpl that can be used if no further customization
1688 /// is needed.
1689 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1691 
1693 
1694  const TargetSubtargetInfo *ST;
1695  const TargetLoweringBase *TLI;
1696 
1697  const TargetSubtargetInfo *getST() const { return ST; }
1698  const TargetLoweringBase *getTLI() const { return TLI; }
1699 
1700 public:
1701  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1702 };
1703 
1704 } // end namespace llvm
1705 
1706 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
Definition: BasicTTIImpl.h:295
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
BitVector & set()
Definition: BitVector.h:397
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:581
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:205
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:633
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:520
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:278
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
A cache of @llvm.assume calls within a function.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:191
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:283
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:267
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:579
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:539
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:772
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:253
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:199
zlib style complession
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:516
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:785
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:647
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:391
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:243
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:395
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:263
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:430
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:863
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Attributes of a target dependent hardware loop.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:33
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:218
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:780
size_type size() const
Definition: SmallPtrSet.h:92
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:628
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:514
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:343
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:172
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:502
iterator end()
Definition: BasicBlock.h:270
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
cl::opt< unsigned > PartialUnrollingThreshold
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:495
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:832
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:211
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:213
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:666
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:405
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:374
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:897
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:428
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:825
block_iterator block_end() const
Definition: LoopInfo.h:157
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:340
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:249
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:317
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:562
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:209
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:436
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:273
Type * getElementType() const
Definition: DerivedTypes.h:394
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:237
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:384
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:226
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:222
block_iterator block_begin() const
Definition: LoopInfo.h:156
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:332
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
BRIND - Indirect branch.
Definition: ISDOpcodes.h:662
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
Definition: BasicTTIImpl.h:300