LLVM  10.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/MC/MCSchedule.h"
45 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Estimate a cost of Broadcast as an extract and sequence of insert
83  /// operations.
84  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85  assert(Ty->isVectorTy() && "Can only shuffle vectors");
86  unsigned Cost = 0;
87  // Broadcast cost is equal to the cost of extracting the zero'th element
88  // plus the cost of inserting it into every element of the result vector.
89  Cost += static_cast<T *>(this)->getVectorInstrCost(
90  Instruction::ExtractElement, Ty, 0);
91 
92  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93  Cost += static_cast<T *>(this)->getVectorInstrCost(
94  Instruction::InsertElement, Ty, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  unsigned getPermuteShuffleOverhead(Type *Ty) {
102  assert(Ty->isVectorTy() && "Can only shuffle vectors");
103  unsigned Cost = 0;
104  // Shuffle cost is equal to the cost of extracting element from its argument
105  // plus the cost of inserting them onto the result vector.
106 
107  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108  // index 0 of first vector, index 1 of second vector,index 2 of first
109  // vector and finally index 3 of second vector and insert them at index
110  // <0,1,2,3> of result vector.
111  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112  Cost += static_cast<T *>(this)
113  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114  Cost += static_cast<T *>(this)
115  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116  }
117  return Cost;
118  }
119 
120  /// Estimate a cost of subvector extraction as a sequence of extract and
121  /// insert operations.
122  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124  "Can only extract subvectors from vectors");
125  int NumSubElts = SubTy->getVectorNumElements();
126  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127  "SK_ExtractSubvector index out of range");
128 
129  unsigned Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += static_cast<T *>(this)->getVectorInstrCost(
135  Instruction::ExtractElement, Ty, i + Index);
136  Cost += static_cast<T *>(this)->getVectorInstrCost(
137  Instruction::InsertElement, SubTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubTy->getVectorNumElements();
148  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149  "SK_InsertSubvector index out of range");
150 
151  unsigned Cost = 0;
152  // Subvector insertion cost is equal to the cost of extracting element from
153  // the source type plus the cost of inserting them into the result vector
154  // type.
155  for (int i = 0; i != NumSubElts; ++i) {
156  Cost += static_cast<T *>(this)->getVectorInstrCost(
157  Instruction::ExtractElement, SubTy, i);
158  Cost += static_cast<T *>(this)->getVectorInstrCost(
159  Instruction::InsertElement, Ty, i + Index);
160  }
161  return Cost;
162  }
163 
164  /// Local query method delegates up to T which *must* implement this!
165  const TargetSubtargetInfo *getST() const {
166  return static_cast<const T *>(this)->getST();
167  }
168 
169  /// Local query method delegates up to T which *must* implement this!
170  const TargetLoweringBase *getTLI() const {
171  return static_cast<const T *>(this)->getTLI();
172  }
173 
174  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175  switch (M) {
176  case TTI::MIM_Unindexed:
177  return ISD::UNINDEXED;
178  case TTI::MIM_PreInc:
179  return ISD::PRE_INC;
180  case TTI::MIM_PreDec:
181  return ISD::PRE_DEC;
182  case TTI::MIM_PostInc:
183  return ISD::POST_INC;
184  case TTI::MIM_PostDec:
185  return ISD::POST_DEC;
186  }
187  llvm_unreachable("Unexpected MemIndexedMode");
188  }
189 
190 protected:
191  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192  : BaseT(DL) {}
193 
195 
196 public:
197  /// \name Scalar TTI Implementations
198  /// @{
200  unsigned AddressSpace, unsigned Alignment,
201  bool *Fast) const {
202  EVT E = EVT::getIntegerVT(Context, BitWidth);
203  return getTLI()->allowsMisalignedMemoryAccesses(
204  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205  }
206 
207  bool hasBranchDivergence() { return false; }
208 
209  bool isSourceOfDivergence(const Value *V) { return false; }
210 
211  bool isAlwaysUniform(const Value *V) { return false; }
212 
213  unsigned getFlatAddressSpace() {
214  // Return an invalid address space.
215  return -1;
216  }
217 
219  Intrinsic::ID IID) const {
220  return false;
221  }
222 
224  Value *OldV, Value *NewV) const {
225  return false;
226  }
227 
228  bool isLegalAddImmediate(int64_t imm) {
229  return getTLI()->isLegalAddImmediate(imm);
230  }
231 
232  bool isLegalICmpImmediate(int64_t imm) {
233  return getTLI()->isLegalICmpImmediate(imm);
234  }
235 
236  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
237  bool HasBaseReg, int64_t Scale,
238  unsigned AddrSpace, Instruction *I = nullptr) {
240  AM.BaseGV = BaseGV;
241  AM.BaseOffs = BaseOffset;
242  AM.HasBaseReg = HasBaseReg;
243  AM.Scale = Scale;
244  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
245  }
246 
248  const DataLayout &DL) const {
249  EVT VT = getTLI()->getValueType(DL, Ty);
250  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
251  }
252 
254  const DataLayout &DL) const {
255  EVT VT = getTLI()->getValueType(DL, Ty);
256  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
257  }
258 
261  }
262 
263  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
264  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266  AM.BaseGV = BaseGV;
267  AM.BaseOffs = BaseOffset;
268  AM.HasBaseReg = HasBaseReg;
269  AM.Scale = Scale;
270  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
271  }
272 
273  bool isTruncateFree(Type *Ty1, Type *Ty2) {
274  return getTLI()->isTruncateFree(Ty1, Ty2);
275  }
276 
278  return getTLI()->isProfitableToHoist(I);
279  }
280 
281  bool useAA() const { return getST()->useAA(); }
282 
283  bool isTypeLegal(Type *Ty) {
284  EVT VT = getTLI()->getValueType(DL, Ty);
285  return getTLI()->isTypeLegal(VT);
286  }
287 
288  int getGEPCost(Type *PointeeType, const Value *Ptr,
290  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
291  }
292 
293  int getExtCost(const Instruction *I, const Value *Src) {
294  if (getTLI()->isExtFree(I))
296 
297  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
298  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
299  if (getTLI()->isExtLoad(LI, I, DL))
301 
303  }
304 
305  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307  return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
308  }
309 
310  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
311  ArrayRef<Type *> ParamTys, const User *U) {
312  if (IID == Intrinsic::cttz) {
313  if (getTLI()->isCheapToSpeculateCttz())
316  }
317 
318  if (IID == Intrinsic::ctlz) {
319  if (getTLI()->isCheapToSpeculateCtlz())
322  }
323 
324  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
325  }
326 
328  unsigned &JumpTableSize) {
329  /// Try to find the estimated number of clusters. Note that the number of
330  /// clusters identified in this function could be different from the actual
331  /// numbers found in lowering. This function ignore switches that are
332  /// lowered with a mix of jump table / bit test / BTree. This function was
333  /// initially intended to be used when estimating the cost of switch in
334  /// inline cost heuristic, but it's a generic cost model to be used in other
335  /// places (e.g., in loop unrolling).
336  unsigned N = SI.getNumCases();
337  const TargetLoweringBase *TLI = getTLI();
338  const DataLayout &DL = this->getDataLayout();
339 
340  JumpTableSize = 0;
341  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
342 
343  // Early exit if both a jump table and bit test are not allowed.
344  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
345  return N;
346 
347  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
348  APInt MinCaseVal = MaxCaseVal;
349  for (auto CI : SI.cases()) {
350  const APInt &CaseVal = CI.getCaseValue()->getValue();
351  if (CaseVal.sgt(MaxCaseVal))
352  MaxCaseVal = CaseVal;
353  if (CaseVal.slt(MinCaseVal))
354  MinCaseVal = CaseVal;
355  }
356 
357  // Check if suitable for a bit test
358  if (N <= DL.getIndexSizeInBits(0u)) {
360  for (auto I : SI.cases())
361  Dests.insert(I.getCaseSuccessor());
362 
363  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
364  DL))
365  return 1;
366  }
367 
368  // Check if suitable for a jump table.
369  if (IsJTAllowed) {
370  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
371  return N;
372  uint64_t Range =
373  (MaxCaseVal - MinCaseVal)
374  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
375  // Check whether a range of clusters is dense enough for a jump table
376  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
377  JumpTableSize = Range;
378  return 1;
379  }
380  }
381  return N;
382  }
383 
385  const TargetLoweringBase *TLI = getTLI();
388  }
389 
390  bool haveFastSqrt(Type *Ty) {
391  const TargetLoweringBase *TLI = getTLI();
392  EVT VT = TLI->getValueType(DL, Ty);
393  return TLI->isTypeLegal(VT) &&
395  }
396 
398  return true;
399  }
400 
401  unsigned getFPOpCost(Type *Ty) {
402  // Check whether FADD is available, as a proxy for floating-point in
403  // general.
404  const TargetLoweringBase *TLI = getTLI();
405  EVT VT = TLI->getValueType(DL, Ty);
409  }
410 
411  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
412  const TargetLoweringBase *TLI = getTLI();
413  switch (Opcode) {
414  default: break;
415  case Instruction::Trunc:
416  if (TLI->isTruncateFree(OpTy, Ty))
419  case Instruction::ZExt:
420  if (TLI->isZExtFree(OpTy, Ty))
423 
424  case Instruction::AddrSpaceCast:
426  Ty->getPointerAddressSpace()))
429  }
430 
431  return BaseT::getOperationCost(Opcode, Ty, OpTy);
432  }
433 
434  unsigned getInliningThresholdMultiplier() { return 1; }
435 
436  int getInlinerVectorBonusPercent() { return 150; }
437 
440  // This unrolling functionality is target independent, but to provide some
441  // motivation for its intended use, for x86:
442 
443  // According to the Intel 64 and IA-32 Architectures Optimization Reference
444  // Manual, Intel Core models and later have a loop stream detector (and
445  // associated uop queue) that can benefit from partial unrolling.
446  // The relevant requirements are:
447  // - The loop must have no more than 4 (8 for Nehalem and later) branches
448  // taken, and none of them may be calls.
449  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
450 
451  // According to the Software Optimization Guide for AMD Family 15h
452  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
453  // and loop buffer which can benefit from partial unrolling.
454  // The relevant requirements are:
455  // - The loop must have fewer than 16 branches
456  // - The loop must have less than 40 uops in all executed loop branches
457 
458  // The number of taken branches in a loop is hard to estimate here, and
459  // benchmarking has revealed that it is better not to be conservative when
460  // estimating the branch count. As a result, we'll ignore the branch limits
461  // until someone finds a case where it matters in practice.
462 
463  unsigned MaxOps;
464  const TargetSubtargetInfo *ST = getST();
465  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
466  MaxOps = PartialUnrollingThreshold;
467  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
468  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
469  else
470  return;
471 
472  // Scan the loop: don't unroll loops with calls.
473  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
474  ++I) {
475  BasicBlock *BB = *I;
476 
477  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
478  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
479  ImmutableCallSite CS(&*J);
480  if (const Function *F = CS.getCalledFunction()) {
481  if (!static_cast<T *>(this)->isLoweredToCall(F))
482  continue;
483  }
484 
485  return;
486  }
487  }
488 
489  // Enable runtime and partial unrolling up to the specified size.
490  // Enable using trip count upper bound to unroll loops.
491  UP.Partial = UP.Runtime = UP.UpperBound = true;
492  UP.PartialThreshold = MaxOps;
493 
494  // Avoid unrolling when optimizing for size.
495  UP.OptSizeThreshold = 0;
497 
498  // Set number of instructions optimized when "back edge"
499  // becomes "fall through" to default value of 2.
500  UP.BEInsns = 2;
501  }
502 
504  AssumptionCache &AC,
505  TargetLibraryInfo *LibInfo,
506  HardwareLoopInfo &HWLoopInfo) {
507  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
508  }
509 
511  if (isa<LoadInst>(I))
512  return getST()->getSchedModel().DefaultLoadLatency;
513 
515  }
516 
517  /// @}
518 
519  /// \name Vector TTI Implementations
520  /// @{
521 
522  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
523 
524  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
525 
526  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
527  /// are set if the result needs to be inserted and/or extracted from vectors.
528  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
529  assert(Ty->isVectorTy() && "Can only scalarize vectors");
530  unsigned Cost = 0;
531 
532  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
533  if (Insert)
534  Cost += static_cast<T *>(this)
535  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
536  if (Extract)
537  Cost += static_cast<T *>(this)
538  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
539  }
540 
541  return Cost;
542  }
543 
544  /// Estimate the overhead of scalarizing an instructions unique
545  /// non-constant operands. The types of the arguments are ordinarily
546  /// scalar, in which case the costs are multiplied with VF.
548  unsigned VF) {
549  unsigned Cost = 0;
550  SmallPtrSet<const Value*, 4> UniqueOperands;
551  for (const Value *A : Args) {
552  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
553  Type *VecTy = nullptr;
554  if (A->getType()->isVectorTy()) {
555  VecTy = A->getType();
556  // If A is a vector operand, VF should be 1 or correspond to A.
557  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
558  "Vector argument does not match VF");
559  }
560  else
561  VecTy = VectorType::get(A->getType(), VF);
562 
563  Cost += getScalarizationOverhead(VecTy, false, true);
564  }
565  }
566 
567  return Cost;
568  }
569 
571  assert(VecTy->isVectorTy());
572 
573  unsigned Cost = 0;
574 
575  Cost += getScalarizationOverhead(VecTy, true, false);
576  if (!Args.empty())
578  VecTy->getVectorNumElements());
579  else
580  // When no information on arguments is provided, we add the cost
581  // associated with one argument as a heuristic.
582  Cost += getScalarizationOverhead(VecTy, false, true);
583 
584  return Cost;
585  }
586 
587  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
588 
590  unsigned Opcode, Type *Ty,
596  // Check if any of the operands are vector operands.
597  const TargetLoweringBase *TLI = getTLI();
598  int ISD = TLI->InstructionOpcodeToISD(Opcode);
599  assert(ISD && "Invalid opcode");
600 
601  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
602 
603  bool IsFloat = Ty->isFPOrFPVectorTy();
604  // Assume that floating point arithmetic operations cost twice as much as
605  // integer operations.
606  unsigned OpCost = (IsFloat ? 2 : 1);
607 
608  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
609  // The operation is legal. Assume it costs 1.
610  // TODO: Once we have extract/insert subvector cost we need to use them.
611  return LT.first * OpCost;
612  }
613 
614  if (!TLI->isOperationExpand(ISD, LT.second)) {
615  // If the operation is custom lowered, then assume that the code is twice
616  // as expensive.
617  return LT.first * 2 * OpCost;
618  }
619 
620  // Else, assume that we need to scalarize this op.
621  // TODO: If one of the types get legalized by splitting, handle this
622  // similarly to what getCastInstrCost() does.
623  if (Ty->isVectorTy()) {
624  unsigned Num = Ty->getVectorNumElements();
625  unsigned Cost = static_cast<T *>(this)
626  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
627  // Return the cost of multiple scalar invocation plus the cost of
628  // inserting and extracting the values.
629  return getScalarizationOverhead(Ty, Args) + Num * Cost;
630  }
631 
632  // We don't know anything about this scalar instruction.
633  return OpCost;
634  }
635 
636  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
637  Type *SubTp) {
638  switch (Kind) {
639  case TTI::SK_Broadcast:
640  return getBroadcastShuffleOverhead(Tp);
641  case TTI::SK_Select:
642  case TTI::SK_Reverse:
643  case TTI::SK_Transpose:
646  return getPermuteShuffleOverhead(Tp);
648  return getExtractSubvectorOverhead(Tp, Index, SubTp);
650  return getInsertSubvectorOverhead(Tp, Index, SubTp);
651  }
652  llvm_unreachable("Unknown TTI::ShuffleKind");
653  }
654 
655  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
656  const Instruction *I = nullptr) {
657  const TargetLoweringBase *TLI = getTLI();
658  int ISD = TLI->InstructionOpcodeToISD(Opcode);
659  assert(ISD && "Invalid opcode");
660  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
661  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
662 
663  // Check for NOOP conversions.
664  if (SrcLT.first == DstLT.first &&
665  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
666 
667  // Bitcast between types that are legalized to the same type are free.
668  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
669  return 0;
670  }
671 
672  if (Opcode == Instruction::Trunc &&
673  TLI->isTruncateFree(SrcLT.second, DstLT.second))
674  return 0;
675 
676  if (Opcode == Instruction::ZExt &&
677  TLI->isZExtFree(SrcLT.second, DstLT.second))
678  return 0;
679 
680  if (Opcode == Instruction::AddrSpaceCast &&
682  Dst->getPointerAddressSpace()))
683  return 0;
684 
685  // If this is a zext/sext of a load, return 0 if the corresponding
686  // extending load exists on target.
687  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
688  I && isa<LoadInst>(I->getOperand(0))) {
689  EVT ExtVT = EVT::getEVT(Dst);
690  EVT LoadVT = EVT::getEVT(Src);
691  unsigned LType =
692  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
693  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
694  return 0;
695  }
696 
697  // If the cast is marked as legal (or promote) then assume low cost.
698  if (SrcLT.first == DstLT.first &&
699  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
700  return 1;
701 
702  // Handle scalar conversions.
703  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
704  // Scalar bitcasts are usually free.
705  if (Opcode == Instruction::BitCast)
706  return 0;
707 
708  // Just check the op cost. If the operation is legal then assume it costs
709  // 1.
710  if (!TLI->isOperationExpand(ISD, DstLT.second))
711  return 1;
712 
713  // Assume that illegal scalar instruction are expensive.
714  return 4;
715  }
716 
717  // Check vector-to-vector casts.
718  if (Dst->isVectorTy() && Src->isVectorTy()) {
719  // If the cast is between same-sized registers, then the check is simple.
720  if (SrcLT.first == DstLT.first &&
721  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
722 
723  // Assume that Zext is done using AND.
724  if (Opcode == Instruction::ZExt)
725  return 1;
726 
727  // Assume that sext is done using SHL and SRA.
728  if (Opcode == Instruction::SExt)
729  return 2;
730 
731  // Just check the op cost. If the operation is legal then assume it
732  // costs
733  // 1 and multiply by the type-legalization overhead.
734  if (!TLI->isOperationExpand(ISD, DstLT.second))
735  return SrcLT.first * 1;
736  }
737 
738  // If we are legalizing by splitting, query the concrete TTI for the cost
739  // of casting the original vector twice. We also need to factor in the
740  // cost of the split itself. Count that as 1, to be consistent with
741  // TLI->getTypeLegalizationCost().
742  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
744  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
746  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
747  Dst->getVectorNumElements() / 2);
748  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
749  Src->getVectorNumElements() / 2);
750  T *TTI = static_cast<T *>(this);
751  return TTI->getVectorSplitCost() +
752  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
753  }
754 
755  // In other cases where the source or destination are illegal, assume
756  // the operation will get scalarized.
757  unsigned Num = Dst->getVectorNumElements();
758  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
759  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
760 
761  // Return the cost of multiple scalar invocation plus the cost of
762  // inserting and extracting the values.
763  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
764  }
765 
766  // We already handled vector-to-vector and scalar-to-scalar conversions.
767  // This
768  // is where we handle bitcast between vectors and scalars. We need to assume
769  // that the conversion is scalarized in one way or another.
770  if (Opcode == Instruction::BitCast)
771  // Illegal bitcasts are done by storing and loading from a stack slot.
772  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
773  : 0) +
774  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
775  : 0);
776 
777  llvm_unreachable("Unhandled cast");
778  }
779 
780  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
781  VectorType *VecTy, unsigned Index) {
782  return static_cast<T *>(this)->getVectorInstrCost(
783  Instruction::ExtractElement, VecTy, Index) +
784  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
785  VecTy->getElementType());
786  }
787 
788  unsigned getCFInstrCost(unsigned Opcode) {
789  // Branches are assumed to be predicted.
790  return 0;
791  }
792 
793  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
794  const Instruction *I) {
795  const TargetLoweringBase *TLI = getTLI();
796  int ISD = TLI->InstructionOpcodeToISD(Opcode);
797  assert(ISD && "Invalid opcode");
798 
799  // Selects on vectors are actually vector selects.
800  if (ISD == ISD::SELECT) {
801  assert(CondTy && "CondTy must exist");
802  if (CondTy->isVectorTy())
803  ISD = ISD::VSELECT;
804  }
805  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
806 
807  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
808  !TLI->isOperationExpand(ISD, LT.second)) {
809  // The operation is legal. Assume it costs 1. Multiply
810  // by the type-legalization overhead.
811  return LT.first * 1;
812  }
813 
814  // Otherwise, assume that the cast is scalarized.
815  // TODO: If one of the types get legalized by splitting, handle this
816  // similarly to what getCastInstrCost() does.
817  if (ValTy->isVectorTy()) {
818  unsigned Num = ValTy->getVectorNumElements();
819  if (CondTy)
820  CondTy = CondTy->getScalarType();
821  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
822  Opcode, ValTy->getScalarType(), CondTy, I);
823 
824  // Return the cost of multiple scalar invocation plus the cost of
825  // inserting and extracting the values.
826  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
827  }
828 
829  // Unknown scalar opcode.
830  return 1;
831  }
832 
833  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
834  std::pair<unsigned, MVT> LT =
835  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
836 
837  return LT.first;
838  }
839 
840  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
841  unsigned AddressSpace, const Instruction *I = nullptr) {
842  assert(!Src->isVoidTy() && "Invalid type");
843  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
844 
845  // Assuming that all loads of legal types cost 1.
846  unsigned Cost = LT.first;
847 
848  if (Src->isVectorTy() &&
849  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
850  // This is a vector load that legalizes to a larger type than the vector
851  // itself. Unless the corresponding extending load or truncating store is
852  // legal, then this will scalarize.
854  EVT MemVT = getTLI()->getValueType(DL, Src);
855  if (Opcode == Instruction::Store)
856  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
857  else
858  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
859 
860  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
861  // This is a vector load/store for some illegal type that is scalarized.
862  // We must account for the cost of building or decomposing the vector.
863  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
864  Opcode == Instruction::Store);
865  }
866  }
867 
868  return Cost;
869  }
870 
871  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
872  unsigned Factor,
873  ArrayRef<unsigned> Indices,
874  unsigned Alignment, unsigned AddressSpace,
875  bool UseMaskForCond = false,
876  bool UseMaskForGaps = false) {
877  VectorType *VT = dyn_cast<VectorType>(VecTy);
878  assert(VT && "Expect a vector type for interleaved memory op");
879 
880  unsigned NumElts = VT->getNumElements();
881  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
882 
883  unsigned NumSubElts = NumElts / Factor;
884  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
885 
886  // Firstly, the cost of load/store operation.
887  unsigned Cost;
888  if (UseMaskForCond || UseMaskForGaps)
889  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
890  Opcode, VecTy, Alignment, AddressSpace);
891  else
892  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
893  AddressSpace);
894 
895  // Legalize the vector type, and get the legalized and unlegalized type
896  // sizes.
897  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
898  unsigned VecTySize =
899  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
900  unsigned VecTyLTSize = VecTyLT.getStoreSize();
901 
902  // Return the ceiling of dividing A by B.
903  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
904 
905  // Scale the cost of the memory operation by the fraction of legalized
906  // instructions that will actually be used. We shouldn't account for the
907  // cost of dead instructions since they will be removed.
908  //
909  // E.g., An interleaved load of factor 8:
910  // %vec = load <16 x i64>, <16 x i64>* %ptr
911  // %v0 = shufflevector %vec, undef, <0, 8>
912  //
913  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
914  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
915  // type). The other loads are unused.
916  //
917  // We only scale the cost of loads since interleaved store groups aren't
918  // allowed to have gaps.
919  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
920  // The number of loads of a legal type it will take to represent a load
921  // of the unlegalized vector type.
922  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
923 
924  // The number of elements of the unlegalized type that correspond to a
925  // single legal instruction.
926  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
927 
928  // Determine which legal instructions will be used.
929  BitVector UsedInsts(NumLegalInsts, false);
930  for (unsigned Index : Indices)
931  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
932  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
933 
934  // Scale the cost of the load by the fraction of legal instructions that
935  // will be used.
936  Cost *= UsedInsts.count() / NumLegalInsts;
937  }
938 
939  // Then plus the cost of interleave operation.
940  if (Opcode == Instruction::Load) {
941  // The interleave cost is similar to extract sub vectors' elements
942  // from the wide vector, and insert them into sub vectors.
943  //
944  // E.g. An interleaved load of factor 2 (with one member of index 0):
945  // %vec = load <8 x i32>, <8 x i32>* %ptr
946  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
947  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
948  // <8 x i32> vector and insert them into a <4 x i32> vector.
949 
950  assert(Indices.size() <= Factor &&
951  "Interleaved memory op has too many members");
952 
953  for (unsigned Index : Indices) {
954  assert(Index < Factor && "Invalid index for interleaved memory op");
955 
956  // Extract elements from loaded vector for each sub vector.
957  for (unsigned i = 0; i < NumSubElts; i++)
958  Cost += static_cast<T *>(this)->getVectorInstrCost(
959  Instruction::ExtractElement, VT, Index + i * Factor);
960  }
961 
962  unsigned InsSubCost = 0;
963  for (unsigned i = 0; i < NumSubElts; i++)
964  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
965  Instruction::InsertElement, SubVT, i);
966 
967  Cost += Indices.size() * InsSubCost;
968  } else {
969  // The interleave cost is extract all elements from sub vectors, and
970  // insert them into the wide vector.
971  //
972  // E.g. An interleaved store of factor 2:
973  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
974  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
975  // The cost is estimated as extract all elements from both <4 x i32>
976  // vectors and insert into the <8 x i32> vector.
977 
978  unsigned ExtSubCost = 0;
979  for (unsigned i = 0; i < NumSubElts; i++)
980  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
981  Instruction::ExtractElement, SubVT, i);
982  Cost += ExtSubCost * Factor;
983 
984  for (unsigned i = 0; i < NumElts; i++)
985  Cost += static_cast<T *>(this)
986  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
987  }
988 
989  if (!UseMaskForCond)
990  return Cost;
991 
992  Type *I8Type = Type::getInt8Ty(VT->getContext());
993  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
994  SubVT = VectorType::get(I8Type, NumSubElts);
995 
996  // The Mask shuffling cost is extract all the elements of the Mask
997  // and insert each of them Factor times into the wide vector:
998  //
999  // E.g. an interleaved group with factor 3:
1000  // %mask = icmp ult <8 x i32> %vec1, %vec2
1001  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1002  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1003  // The cost is estimated as extract all mask elements from the <8xi1> mask
1004  // vector and insert them factor times into the <24xi1> shuffled mask
1005  // vector.
1006  for (unsigned i = 0; i < NumSubElts; i++)
1007  Cost += static_cast<T *>(this)->getVectorInstrCost(
1008  Instruction::ExtractElement, SubVT, i);
1009 
1010  for (unsigned i = 0; i < NumElts; i++)
1011  Cost += static_cast<T *>(this)->getVectorInstrCost(
1012  Instruction::InsertElement, MaskVT, i);
1013 
1014  // The Gaps mask is invariant and created outside the loop, therefore the
1015  // cost of creating it is not accounted for here. However if we have both
1016  // a MaskForGaps and some other mask that guards the execution of the
1017  // memory access, we need to account for the cost of And-ing the two masks
1018  // inside the loop.
1019  if (UseMaskForGaps)
1020  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1021  BinaryOperator::And, MaskVT);
1022 
1023  return Cost;
1024  }
1025 
1026  /// Get intrinsic cost based on arguments.
1029  unsigned VF = 1) {
1030  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1031  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1032  auto *ConcreteTTI = static_cast<T *>(this);
1033 
1034  switch (IID) {
1035  default: {
1036  // Assume that we need to scalarize this intrinsic.
1037  SmallVector<Type *, 4> Types;
1038  for (Value *Op : Args) {
1039  Type *OpTy = Op->getType();
1040  assert(VF == 1 || !OpTy->isVectorTy());
1041  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1042  }
1043 
1044  if (VF > 1 && !RetTy->isVoidTy())
1045  RetTy = VectorType::get(RetTy, VF);
1046 
1047  // Compute the scalarization overhead based on Args for a vector
1048  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1049  // CostModel will pass a vector RetTy and VF is 1.
1050  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1051  if (RetVF > 1 || VF > 1) {
1052  ScalarizationCost = 0;
1053  if (!RetTy->isVoidTy())
1054  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1055  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1056  }
1057 
1058  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1059  ScalarizationCost);
1060  }
1061  case Intrinsic::masked_scatter: {
1062  assert(VF == 1 && "Can't vectorize types here.");
1063  Value *Mask = Args[3];
1064  bool VarMask = !isa<Constant>(Mask);
1065  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1066  return ConcreteTTI->getGatherScatterOpCost(
1067  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1068  }
1069  case Intrinsic::masked_gather: {
1070  assert(VF == 1 && "Can't vectorize types here.");
1071  Value *Mask = Args[2];
1072  bool VarMask = !isa<Constant>(Mask);
1073  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1074  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1075  Args[0], VarMask, Alignment);
1076  }
1077  case Intrinsic::experimental_vector_reduce_add:
1078  case Intrinsic::experimental_vector_reduce_mul:
1079  case Intrinsic::experimental_vector_reduce_and:
1080  case Intrinsic::experimental_vector_reduce_or:
1081  case Intrinsic::experimental_vector_reduce_xor:
1082  case Intrinsic::experimental_vector_reduce_v2_fadd:
1083  case Intrinsic::experimental_vector_reduce_v2_fmul:
1084  case Intrinsic::experimental_vector_reduce_smax:
1085  case Intrinsic::experimental_vector_reduce_smin:
1086  case Intrinsic::experimental_vector_reduce_fmax:
1087  case Intrinsic::experimental_vector_reduce_fmin:
1088  case Intrinsic::experimental_vector_reduce_umax:
1089  case Intrinsic::experimental_vector_reduce_umin:
1090  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1091  case Intrinsic::fshl:
1092  case Intrinsic::fshr: {
1093  Value *X = Args[0];
1094  Value *Y = Args[1];
1095  Value *Z = Args[2];
1096  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1097  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1098  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1099  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1101  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1102  : TTI::OP_None;
1103  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1104  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1105  unsigned Cost = 0;
1106  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1107  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1108  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1109  OpKindX, OpKindZ, OpPropsX);
1110  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1111  OpKindY, OpKindZ, OpPropsY);
1112  // Non-constant shift amounts requires a modulo.
1113  if (OpKindZ != TTI::OK_UniformConstantValue &&
1115  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1116  OpKindZ, OpKindBW, OpPropsZ,
1117  OpPropsBW);
1118  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1119  if (X != Y) {
1120  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1121  if (RetVF > 1)
1122  CondTy = VectorType::get(CondTy, RetVF);
1123  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1124  CondTy, nullptr);
1125  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1126  CondTy, nullptr);
1127  }
1128  return Cost;
1129  }
1130  }
1131  }
1132 
1133  /// Get intrinsic cost based on argument types.
1134  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1135  /// cost of scalarizing the arguments and the return value will be computed
1136  /// based on types.
1138  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1139  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1140  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1141  auto *ConcreteTTI = static_cast<T *>(this);
1142 
1144  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1145  switch (IID) {
1146  default: {
1147  // Assume that we need to scalarize this intrinsic.
1148  unsigned ScalarizationCost = ScalarizationCostPassed;
1149  unsigned ScalarCalls = 1;
1150  Type *ScalarRetTy = RetTy;
1151  if (RetTy->isVectorTy()) {
1152  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1153  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1154  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1155  ScalarRetTy = RetTy->getScalarType();
1156  }
1157  SmallVector<Type *, 4> ScalarTys;
1158  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1159  Type *Ty = Tys[i];
1160  if (Ty->isVectorTy()) {
1161  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1162  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1163  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1164  Ty = Ty->getScalarType();
1165  }
1166  ScalarTys.push_back(Ty);
1167  }
1168  if (ScalarCalls == 1)
1169  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1170 
1171  unsigned ScalarCost =
1172  ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1173 
1174  return ScalarCalls * ScalarCost + ScalarizationCost;
1175  }
1176  // Look for intrinsics that can be lowered directly or turned into a scalar
1177  // intrinsic call.
1178  case Intrinsic::sqrt:
1179  ISDs.push_back(ISD::FSQRT);
1180  break;
1181  case Intrinsic::sin:
1182  ISDs.push_back(ISD::FSIN);
1183  break;
1184  case Intrinsic::cos:
1185  ISDs.push_back(ISD::FCOS);
1186  break;
1187  case Intrinsic::exp:
1188  ISDs.push_back(ISD::FEXP);
1189  break;
1190  case Intrinsic::exp2:
1191  ISDs.push_back(ISD::FEXP2);
1192  break;
1193  case Intrinsic::log:
1194  ISDs.push_back(ISD::FLOG);
1195  break;
1196  case Intrinsic::log10:
1197  ISDs.push_back(ISD::FLOG10);
1198  break;
1199  case Intrinsic::log2:
1200  ISDs.push_back(ISD::FLOG2);
1201  break;
1202  case Intrinsic::fabs:
1203  ISDs.push_back(ISD::FABS);
1204  break;
1205  case Intrinsic::canonicalize:
1207  break;
1208  case Intrinsic::minnum:
1209  ISDs.push_back(ISD::FMINNUM);
1210  if (FMF.noNaNs())
1211  ISDs.push_back(ISD::FMINIMUM);
1212  break;
1213  case Intrinsic::maxnum:
1214  ISDs.push_back(ISD::FMAXNUM);
1215  if (FMF.noNaNs())
1216  ISDs.push_back(ISD::FMAXIMUM);
1217  break;
1218  case Intrinsic::copysign:
1219  ISDs.push_back(ISD::FCOPYSIGN);
1220  break;
1221  case Intrinsic::floor:
1222  ISDs.push_back(ISD::FFLOOR);
1223  break;
1224  case Intrinsic::ceil:
1225  ISDs.push_back(ISD::FCEIL);
1226  break;
1227  case Intrinsic::trunc:
1228  ISDs.push_back(ISD::FTRUNC);
1229  break;
1230  case Intrinsic::nearbyint:
1231  ISDs.push_back(ISD::FNEARBYINT);
1232  break;
1233  case Intrinsic::rint:
1234  ISDs.push_back(ISD::FRINT);
1235  break;
1236  case Intrinsic::round:
1237  ISDs.push_back(ISD::FROUND);
1238  break;
1239  case Intrinsic::pow:
1240  ISDs.push_back(ISD::FPOW);
1241  break;
1242  case Intrinsic::fma:
1243  ISDs.push_back(ISD::FMA);
1244  break;
1245  case Intrinsic::fmuladd:
1246  ISDs.push_back(ISD::FMA);
1247  break;
1248  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1249  case Intrinsic::lifetime_start:
1250  case Intrinsic::lifetime_end:
1251  case Intrinsic::sideeffect:
1252  return 0;
1253  case Intrinsic::masked_store:
1254  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1255  0);
1256  case Intrinsic::masked_load:
1257  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1258  case Intrinsic::experimental_vector_reduce_add:
1259  return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1260  /*IsPairwiseForm=*/false);
1261  case Intrinsic::experimental_vector_reduce_mul:
1262  return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1263  /*IsPairwiseForm=*/false);
1264  case Intrinsic::experimental_vector_reduce_and:
1265  return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1266  /*IsPairwiseForm=*/false);
1267  case Intrinsic::experimental_vector_reduce_or:
1268  return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1269  /*IsPairwiseForm=*/false);
1270  case Intrinsic::experimental_vector_reduce_xor:
1271  return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1272  /*IsPairwiseForm=*/false);
1273  case Intrinsic::experimental_vector_reduce_v2_fadd:
1274  return ConcreteTTI->getArithmeticReductionCost(
1275  Instruction::FAdd, Tys[0],
1276  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1277  // reductions.
1278  case Intrinsic::experimental_vector_reduce_v2_fmul:
1279  return ConcreteTTI->getArithmeticReductionCost(
1280  Instruction::FMul, Tys[0],
1281  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1282  // reductions.
1283  case Intrinsic::experimental_vector_reduce_smax:
1284  case Intrinsic::experimental_vector_reduce_smin:
1285  case Intrinsic::experimental_vector_reduce_fmax:
1286  case Intrinsic::experimental_vector_reduce_fmin:
1287  return ConcreteTTI->getMinMaxReductionCost(
1288  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1289  /*IsUnsigned=*/true);
1290  case Intrinsic::experimental_vector_reduce_umax:
1291  case Intrinsic::experimental_vector_reduce_umin:
1292  return ConcreteTTI->getMinMaxReductionCost(
1293  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1294  /*IsUnsigned=*/false);
1295  case Intrinsic::sadd_sat:
1296  case Intrinsic::ssub_sat: {
1297  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1298  if (RetVF > 1)
1299  CondTy = VectorType::get(CondTy, RetVF);
1300 
1301  Type *OpTy = StructType::create({RetTy, CondTy});
1302  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1303  ? Intrinsic::sadd_with_overflow
1304  : Intrinsic::ssub_with_overflow;
1305 
1306  // SatMax -> Overflow && SumDiff < 0
1307  // SatMin -> Overflow && SumDiff >= 0
1308  unsigned Cost = 0;
1309  Cost += ConcreteTTI->getIntrinsicInstrCost(
1310  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1311  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1312  CondTy, nullptr);
1313  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1314  CondTy, nullptr);
1315  return Cost;
1316  }
1317  case Intrinsic::uadd_sat:
1318  case Intrinsic::usub_sat: {
1319  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1320  if (RetVF > 1)
1321  CondTy = VectorType::get(CondTy, RetVF);
1322 
1323  Type *OpTy = StructType::create({RetTy, CondTy});
1324  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1325  ? Intrinsic::uadd_with_overflow
1326  : Intrinsic::usub_with_overflow;
1327 
1328  unsigned Cost = 0;
1329  Cost += ConcreteTTI->getIntrinsicInstrCost(
1330  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1331  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1332  CondTy, nullptr);
1333  return Cost;
1334  }
1335  case Intrinsic::smul_fix:
1336  case Intrinsic::umul_fix: {
1337  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1338  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1339  if (RetVF > 1)
1340  ExtTy = VectorType::get(ExtTy, RetVF);
1341 
1342  unsigned ExtOp =
1343  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1344 
1345  unsigned Cost = 0;
1346  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1347  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1348  Cost +=
1349  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1350  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1353  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1356  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1357  return Cost;
1358  }
1359  case Intrinsic::sadd_with_overflow:
1360  case Intrinsic::ssub_with_overflow: {
1361  Type *SumTy = RetTy->getContainedType(0);
1362  Type *OverflowTy = RetTy->getContainedType(1);
1363  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1365  : BinaryOperator::Sub;
1366 
1367  // LHSSign -> LHS >= 0
1368  // RHSSign -> RHS >= 0
1369  // SumSign -> Sum >= 0
1370  //
1371  // Add:
1372  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1373  // Sub:
1374  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1375  unsigned Cost = 0;
1376  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1377  Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1378  OverflowTy, nullptr);
1379  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1380  BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1381  Cost +=
1382  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1383  return Cost;
1384  }
1385  case Intrinsic::uadd_with_overflow:
1386  case Intrinsic::usub_with_overflow: {
1387  Type *SumTy = RetTy->getContainedType(0);
1388  Type *OverflowTy = RetTy->getContainedType(1);
1389  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1391  : BinaryOperator::Sub;
1392 
1393  unsigned Cost = 0;
1394  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1395  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1396  OverflowTy, nullptr);
1397  return Cost;
1398  }
1399  case Intrinsic::smul_with_overflow:
1400  case Intrinsic::umul_with_overflow: {
1401  Type *MulTy = RetTy->getContainedType(0);
1402  Type *OverflowTy = RetTy->getContainedType(1);
1403  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1404  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1405  if (MulTy->isVectorTy())
1406  ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1407 
1408  unsigned ExtOp =
1409  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1410 
1411  unsigned Cost = 0;
1412  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1413  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1414  Cost +=
1415  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1416  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1419 
1420  if (IID == Intrinsic::smul_with_overflow)
1421  Cost += ConcreteTTI->getArithmeticInstrCost(
1422  Instruction::AShr, MulTy, TTI::OK_AnyValue,
1424 
1425  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1426  OverflowTy, nullptr);
1427  return Cost;
1428  }
1429  case Intrinsic::ctpop:
1430  ISDs.push_back(ISD::CTPOP);
1431  // In case of legalization use TCC_Expensive. This is cheaper than a
1432  // library call but still not a cheap instruction.
1433  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1434  break;
1435  // FIXME: ctlz, cttz, ...
1436  }
1437 
1438  const TargetLoweringBase *TLI = getTLI();
1439  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1440 
1441  SmallVector<unsigned, 2> LegalCost;
1442  SmallVector<unsigned, 2> CustomCost;
1443  for (unsigned ISD : ISDs) {
1444  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1445  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1446  TLI->isFAbsFree(LT.second)) {
1447  return 0;
1448  }
1449 
1450  // The operation is legal. Assume it costs 1.
1451  // If the type is split to multiple registers, assume that there is some
1452  // overhead to this.
1453  // TODO: Once we have extract/insert subvector cost we need to use them.
1454  if (LT.first > 1)
1455  LegalCost.push_back(LT.first * 2);
1456  else
1457  LegalCost.push_back(LT.first * 1);
1458  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1459  // If the operation is custom lowered then assume
1460  // that the code is twice as expensive.
1461  CustomCost.push_back(LT.first * 2);
1462  }
1463  }
1464 
1465  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1466  if (MinLegalCostI != LegalCost.end())
1467  return *MinLegalCostI;
1468 
1469  auto MinCustomCostI =
1470  std::min_element(CustomCost.begin(), CustomCost.end());
1471  if (MinCustomCostI != CustomCost.end())
1472  return *MinCustomCostI;
1473 
1474  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1475  // point mul followed by an add.
1476  if (IID == Intrinsic::fmuladd)
1477  return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1478  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1479 
1480  // Else, assume that we need to scalarize this intrinsic. For math builtins
1481  // this will emit a costly libcall, adding call overhead and spills. Make it
1482  // very expensive.
1483  if (RetTy->isVectorTy()) {
1484  unsigned ScalarizationCost =
1485  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1486  ? ScalarizationCostPassed
1487  : getScalarizationOverhead(RetTy, true, false));
1488  unsigned ScalarCalls = RetTy->getVectorNumElements();
1489  SmallVector<Type *, 4> ScalarTys;
1490  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1491  Type *Ty = Tys[i];
1492  if (Ty->isVectorTy())
1493  Ty = Ty->getScalarType();
1494  ScalarTys.push_back(Ty);
1495  }
1496  unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1497  IID, RetTy->getScalarType(), ScalarTys, FMF);
1498  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1499  if (Tys[i]->isVectorTy()) {
1500  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1501  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1502  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1503  }
1504  }
1505 
1506  return ScalarCalls * ScalarCost + ScalarizationCost;
1507  }
1508 
1509  // This is going to be turned into a library call, make it expensive.
1510  return SingleCallCost;
1511  }
1512 
1513  /// Compute a cost of the given call instruction.
1514  ///
1515  /// Compute the cost of calling function F with return type RetTy and
1516  /// argument types Tys. F might be nullptr, in this case the cost of an
1517  /// arbitrary call with the specified signature will be returned.
1518  /// This is used, for instance, when we estimate call of a vector
1519  /// counterpart of the given function.
1520  /// \param F Called function, might be nullptr.
1521  /// \param RetTy Return value types.
1522  /// \param Tys Argument types.
1523  /// \returns The cost of Call instruction.
1525  return 10;
1526  }
1527 
1528  unsigned getNumberOfParts(Type *Tp) {
1529  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1530  return LT.first;
1531  }
1532 
1534  const SCEV *) {
1535  return 0;
1536  }
1537 
1538  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1539  /// We're assuming that reduction operation are performing the following way:
1540  /// 1. Non-pairwise reduction
1541  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1542  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1543  /// \----------------v-------------/ \----------v------------/
1544  /// n/2 elements n/2 elements
1545  /// %red1 = op <n x t> %val, <n x t> val1
1546  /// After this operation we have a vector %red1 where only the first n/2
1547  /// elements are meaningful, the second n/2 elements are undefined and can be
1548  /// dropped. All other operations are actually working with the vector of
1549  /// length n/2, not n, though the real vector length is still n.
1550  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1551  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1552  /// \----------------v-------------/ \----------v------------/
1553  /// n/4 elements 3*n/4 elements
1554  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1555  /// length n/2, the resulting vector has length n/4 etc.
1556  /// 2. Pairwise reduction:
1557  /// Everything is the same except for an additional shuffle operation which
1558  /// is used to produce operands for pairwise kind of reductions.
1559  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1560  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1561  /// \-------------v----------/ \----------v------------/
1562  /// n/2 elements n/2 elements
1563  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1564  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1565  /// \-------------v----------/ \----------v------------/
1566  /// n/2 elements n/2 elements
1567  /// %red1 = op <n x t> %val1, <n x t> val2
1568  /// Again, the operation is performed on <n x t> vector, but the resulting
1569  /// vector %red1 is <n/2 x t> vector.
1570  ///
1571  /// The cost model should take into account that the actual length of the
1572  /// vector is reduced on each iteration.
1573  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1574  bool IsPairwise) {
1575  assert(Ty->isVectorTy() && "Expect a vector type");
1576  Type *ScalarTy = Ty->getVectorElementType();
1577  unsigned NumVecElts = Ty->getVectorNumElements();
1578  unsigned NumReduxLevels = Log2_32(NumVecElts);
1579  unsigned ArithCost = 0;
1580  unsigned ShuffleCost = 0;
1581  auto *ConcreteTTI = static_cast<T *>(this);
1582  std::pair<unsigned, MVT> LT =
1583  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1584  unsigned LongVectorCount = 0;
1585  unsigned MVTLen =
1586  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1587  while (NumVecElts > MVTLen) {
1588  NumVecElts /= 2;
1589  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1590  // Assume the pairwise shuffles add a cost.
1591  ShuffleCost += (IsPairwise + 1) *
1592  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1593  NumVecElts, SubTy);
1594  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1595  Ty = SubTy;
1596  ++LongVectorCount;
1597  }
1598 
1599  NumReduxLevels -= LongVectorCount;
1600 
1601  // The minimal length of the vector is limited by the real length of vector
1602  // operations performed on the current platform. That's why several final
1603  // reduction operations are performed on the vectors with the same
1604  // architecture-dependent length.
1605 
1606  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1607  // reductions need two shuffles on every level, but the last one. On that
1608  // level one of the shuffles is <0, u, u, ...> which is identity.
1609  unsigned NumShuffles = NumReduxLevels;
1610  if (IsPairwise && NumReduxLevels >= 1)
1611  NumShuffles += NumReduxLevels - 1;
1612  ShuffleCost += NumShuffles *
1613  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1614  0, Ty);
1615  ArithCost += NumReduxLevels *
1616  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1617  return ShuffleCost + ArithCost +
1618  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1619  }
1620 
1621  /// Try to calculate op costs for min/max reduction operations.
1622  /// \param CondTy Conditional type for the Select instruction.
1623  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1624  bool) {
1625  assert(Ty->isVectorTy() && "Expect a vector type");
1626  Type *ScalarTy = Ty->getVectorElementType();
1627  Type *ScalarCondTy = CondTy->getVectorElementType();
1628  unsigned NumVecElts = Ty->getVectorNumElements();
1629  unsigned NumReduxLevels = Log2_32(NumVecElts);
1630  unsigned CmpOpcode;
1631  if (Ty->isFPOrFPVectorTy()) {
1632  CmpOpcode = Instruction::FCmp;
1633  } else {
1634  assert(Ty->isIntOrIntVectorTy() &&
1635  "expecting floating point or integer type for min/max reduction");
1636  CmpOpcode = Instruction::ICmp;
1637  }
1638  unsigned MinMaxCost = 0;
1639  unsigned ShuffleCost = 0;
1640  auto *ConcreteTTI = static_cast<T *>(this);
1641  std::pair<unsigned, MVT> LT =
1642  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1643  unsigned LongVectorCount = 0;
1644  unsigned MVTLen =
1645  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1646  while (NumVecElts > MVTLen) {
1647  NumVecElts /= 2;
1648  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1649  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1650 
1651  // Assume the pairwise shuffles add a cost.
1652  ShuffleCost += (IsPairwise + 1) *
1653  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1654  NumVecElts, SubTy);
1655  MinMaxCost +=
1656  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1657  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1658  nullptr);
1659  Ty = SubTy;
1660  ++LongVectorCount;
1661  }
1662 
1663  NumReduxLevels -= LongVectorCount;
1664 
1665  // The minimal length of the vector is limited by the real length of vector
1666  // operations performed on the current platform. That's why several final
1667  // reduction opertions are perfomed on the vectors with the same
1668  // architecture-dependent length.
1669 
1670  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1671  // reductions need two shuffles on every level, but the last one. On that
1672  // level one of the shuffles is <0, u, u, ...> which is identity.
1673  unsigned NumShuffles = NumReduxLevels;
1674  if (IsPairwise && NumReduxLevels >= 1)
1675  NumShuffles += NumReduxLevels - 1;
1676  ShuffleCost += NumShuffles *
1677  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1678  0, Ty);
1679  MinMaxCost +=
1680  NumReduxLevels *
1681  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1682  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1683  nullptr));
1684  // The last min/max should be in vector registers and we counted it above.
1685  // So just need a single extractelement.
1686  return ShuffleCost + MinMaxCost +
1687  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1688  }
1689 
1690  unsigned getVectorSplitCost() { return 1; }
1691 
1692  /// @}
1693 };
1694 
1695 /// Concrete BasicTTIImpl that can be used if no further customization
1696 /// is needed.
1697 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1699 
1701 
1702  const TargetSubtargetInfo *ST;
1703  const TargetLoweringBase *TLI;
1704 
1705  const TargetSubtargetInfo *getST() const { return ST; }
1706  const TargetLoweringBase *getTLI() const { return TLI; }
1707 
1708 public:
1709  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1710 };
1711 
1712 } // end namespace llvm
1713 
1714 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
Definition: BasicTTIImpl.h:305
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
BitVector & set()
Definition: BitVector.h:397
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:589
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:399
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:205
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:218
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:633
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:528
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:288
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
A cache of @llvm.assume calls within a function.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:191
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:293
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:277
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:587
bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:223
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:547
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:780
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:263
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
mir Rename Register Operands
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:199
zlib style complession
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:524
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:793
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:655
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:460
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:397
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:253
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:401
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:273
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:438
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:871
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Attributes of a target dependent hardware loop.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:33
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1253
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:228
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:788
size_type size() const
Definition: SmallPtrSet.h:92
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:636
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:522
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:350
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:172
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:510
iterator end()
Definition: BasicBlock.h:270
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
cl::opt< unsigned > PartialUnrollingThreshold
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:503
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:451
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:840
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:343
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:211
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:213
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:666
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:411
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:897
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:434
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:833
block_iterator block_end() const
Definition: LoopInfo.h:160
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:475
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:259
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:327
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:570
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:209
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:333
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:445
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:436
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:283
Type * getElementType() const
Definition: DerivedTypes.h:394
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:247
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:390
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:236
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:232
block_iterator block_begin() const
Definition: LoopInfo.h:159
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1242
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:332
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
BRIND - Indirect branch.
Definition: ISDOpcodes.h:662
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
Definition: BasicTTIImpl.h:310