LLVM  9.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/MC/MCSchedule.h"
45 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Estimate a cost of Broadcast as an extract and sequence of insert
83  /// operations.
84  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85  assert(Ty->isVectorTy() && "Can only shuffle vectors");
86  unsigned Cost = 0;
87  // Broadcast cost is equal to the cost of extracting the zero'th element
88  // plus the cost of inserting it into every element of the result vector.
89  Cost += static_cast<T *>(this)->getVectorInstrCost(
90  Instruction::ExtractElement, Ty, 0);
91 
92  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93  Cost += static_cast<T *>(this)->getVectorInstrCost(
94  Instruction::InsertElement, Ty, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  unsigned getPermuteShuffleOverhead(Type *Ty) {
102  assert(Ty->isVectorTy() && "Can only shuffle vectors");
103  unsigned Cost = 0;
104  // Shuffle cost is equal to the cost of extracting element from its argument
105  // plus the cost of inserting them onto the result vector.
106 
107  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108  // index 0 of first vector, index 1 of second vector,index 2 of first
109  // vector and finally index 3 of second vector and insert them at index
110  // <0,1,2,3> of result vector.
111  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112  Cost += static_cast<T *>(this)
113  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114  Cost += static_cast<T *>(this)
115  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116  }
117  return Cost;
118  }
119 
120  /// Estimate a cost of subvector extraction as a sequence of extract and
121  /// insert operations.
122  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124  "Can only extract subvectors from vectors");
125  int NumSubElts = SubTy->getVectorNumElements();
126  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127  "SK_ExtractSubvector index out of range");
128 
129  unsigned Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += static_cast<T *>(this)->getVectorInstrCost(
135  Instruction::ExtractElement, Ty, i + Index);
136  Cost += static_cast<T *>(this)->getVectorInstrCost(
137  Instruction::InsertElement, SubTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubTy->getVectorNumElements();
148  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149  "SK_InsertSubvector index out of range");
150 
151  unsigned Cost = 0;
152  // Subvector insertion cost is equal to the cost of extracting element from
153  // the source type plus the cost of inserting them into the result vector
154  // type.
155  for (int i = 0; i != NumSubElts; ++i) {
156  Cost += static_cast<T *>(this)->getVectorInstrCost(
157  Instruction::ExtractElement, SubTy, i);
158  Cost += static_cast<T *>(this)->getVectorInstrCost(
159  Instruction::InsertElement, Ty, i + Index);
160  }
161  return Cost;
162  }
163 
164  /// Local query method delegates up to T which *must* implement this!
165  const TargetSubtargetInfo *getST() const {
166  return static_cast<const T *>(this)->getST();
167  }
168 
169  /// Local query method delegates up to T which *must* implement this!
170  const TargetLoweringBase *getTLI() const {
171  return static_cast<const T *>(this)->getTLI();
172  }
173 
174  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175  switch (M) {
176  case TTI::MIM_Unindexed:
177  return ISD::UNINDEXED;
178  case TTI::MIM_PreInc:
179  return ISD::PRE_INC;
180  case TTI::MIM_PreDec:
181  return ISD::PRE_DEC;
182  case TTI::MIM_PostInc:
183  return ISD::POST_INC;
184  case TTI::MIM_PostDec:
185  return ISD::POST_DEC;
186  }
187  llvm_unreachable("Unexpected MemIndexedMode");
188  }
189 
190 protected:
191  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192  : BaseT(DL) {}
193 
195 
196 public:
197  /// \name Scalar TTI Implementations
198  /// @{
200  unsigned BitWidth, unsigned AddressSpace,
201  unsigned Alignment, bool *Fast) const {
202  EVT E = EVT::getIntegerVT(Context, BitWidth);
203  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
204  }
205 
206  bool hasBranchDivergence() { return false; }
207 
208  bool isSourceOfDivergence(const Value *V) { return false; }
209 
210  bool isAlwaysUniform(const Value *V) { return false; }
211 
212  unsigned getFlatAddressSpace() {
213  // Return an invalid address space.
214  return -1;
215  }
216 
217  bool isLegalAddImmediate(int64_t imm) {
218  return getTLI()->isLegalAddImmediate(imm);
219  }
220 
221  bool isLegalICmpImmediate(int64_t imm) {
222  return getTLI()->isLegalICmpImmediate(imm);
223  }
224 
225  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
226  bool HasBaseReg, int64_t Scale,
227  unsigned AddrSpace, Instruction *I = nullptr) {
229  AM.BaseGV = BaseGV;
230  AM.BaseOffs = BaseOffset;
231  AM.HasBaseReg = HasBaseReg;
232  AM.Scale = Scale;
233  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
234  }
235 
237  const DataLayout &DL) const {
238  EVT VT = getTLI()->getValueType(DL, Ty);
239  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
240  }
241 
243  const DataLayout &DL) const {
244  EVT VT = getTLI()->getValueType(DL, Ty);
245  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
246  }
247 
250  }
251 
252  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
253  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
255  AM.BaseGV = BaseGV;
256  AM.BaseOffs = BaseOffset;
257  AM.HasBaseReg = HasBaseReg;
258  AM.Scale = Scale;
259  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
260  }
261 
262  bool isTruncateFree(Type *Ty1, Type *Ty2) {
263  return getTLI()->isTruncateFree(Ty1, Ty2);
264  }
265 
267  return getTLI()->isProfitableToHoist(I);
268  }
269 
270  bool useAA() const { return getST()->useAA(); }
271 
272  bool isTypeLegal(Type *Ty) {
273  EVT VT = getTLI()->getValueType(DL, Ty);
274  return getTLI()->isTypeLegal(VT);
275  }
276 
277  int getGEPCost(Type *PointeeType, const Value *Ptr,
278  ArrayRef<const Value *> Operands) {
279  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
280  }
281 
282  int getExtCost(const Instruction *I, const Value *Src) {
283  if (getTLI()->isExtFree(I))
285 
286  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
287  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
288  if (getTLI()->isExtLoad(LI, I, DL))
290 
292  }
293 
294  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
296  return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
297  }
298 
299  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
300  ArrayRef<Type *> ParamTys, const User *U) {
301  if (IID == Intrinsic::cttz) {
302  if (getTLI()->isCheapToSpeculateCttz())
305  }
306 
307  if (IID == Intrinsic::ctlz) {
308  if (getTLI()->isCheapToSpeculateCtlz())
311  }
312 
313  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
314  }
315 
317  unsigned &JumpTableSize) {
318  /// Try to find the estimated number of clusters. Note that the number of
319  /// clusters identified in this function could be different from the actural
320  /// numbers found in lowering. This function ignore switches that are
321  /// lowered with a mix of jump table / bit test / BTree. This function was
322  /// initially intended to be used when estimating the cost of switch in
323  /// inline cost heuristic, but it's a generic cost model to be used in other
324  /// places (e.g., in loop unrolling).
325  unsigned N = SI.getNumCases();
326  const TargetLoweringBase *TLI = getTLI();
327  const DataLayout &DL = this->getDataLayout();
328 
329  JumpTableSize = 0;
330  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
331 
332  // Early exit if both a jump table and bit test are not allowed.
333  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
334  return N;
335 
336  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
337  APInt MinCaseVal = MaxCaseVal;
338  for (auto CI : SI.cases()) {
339  const APInt &CaseVal = CI.getCaseValue()->getValue();
340  if (CaseVal.sgt(MaxCaseVal))
341  MaxCaseVal = CaseVal;
342  if (CaseVal.slt(MinCaseVal))
343  MinCaseVal = CaseVal;
344  }
345 
346  // Check if suitable for a bit test
347  if (N <= DL.getIndexSizeInBits(0u)) {
349  for (auto I : SI.cases())
350  Dests.insert(I.getCaseSuccessor());
351 
352  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
353  DL))
354  return 1;
355  }
356 
357  // Check if suitable for a jump table.
358  if (IsJTAllowed) {
359  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
360  return N;
361  uint64_t Range =
362  (MaxCaseVal - MinCaseVal)
363  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
364  // Check whether a range of clusters is dense enough for a jump table
365  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
366  JumpTableSize = Range;
367  return 1;
368  }
369  }
370  return N;
371  }
372 
373  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
374 
375  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
376 
378  const TargetLoweringBase *TLI = getTLI();
381  }
382 
383  bool haveFastSqrt(Type *Ty) {
384  const TargetLoweringBase *TLI = getTLI();
385  EVT VT = TLI->getValueType(DL, Ty);
386  return TLI->isTypeLegal(VT) &&
388  }
389 
391  return true;
392  }
393 
394  unsigned getFPOpCost(Type *Ty) {
395  // Check whether FADD is available, as a proxy for floating-point in
396  // general.
397  const TargetLoweringBase *TLI = getTLI();
398  EVT VT = TLI->getValueType(DL, Ty);
402  }
403 
404  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
405  const TargetLoweringBase *TLI = getTLI();
406  switch (Opcode) {
407  default: break;
408  case Instruction::Trunc:
409  if (TLI->isTruncateFree(OpTy, Ty))
412  case Instruction::ZExt:
413  if (TLI->isZExtFree(OpTy, Ty))
416  }
417 
418  return BaseT::getOperationCost(Opcode, Ty, OpTy);
419  }
420 
421  unsigned getInliningThresholdMultiplier() { return 1; }
422 
425  // This unrolling functionality is target independent, but to provide some
426  // motivation for its intended use, for x86:
427 
428  // According to the Intel 64 and IA-32 Architectures Optimization Reference
429  // Manual, Intel Core models and later have a loop stream detector (and
430  // associated uop queue) that can benefit from partial unrolling.
431  // The relevant requirements are:
432  // - The loop must have no more than 4 (8 for Nehalem and later) branches
433  // taken, and none of them may be calls.
434  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
435 
436  // According to the Software Optimization Guide for AMD Family 15h
437  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
438  // and loop buffer which can benefit from partial unrolling.
439  // The relevant requirements are:
440  // - The loop must have fewer than 16 branches
441  // - The loop must have less than 40 uops in all executed loop branches
442 
443  // The number of taken branches in a loop is hard to estimate here, and
444  // benchmarking has revealed that it is better not to be conservative when
445  // estimating the branch count. As a result, we'll ignore the branch limits
446  // until someone finds a case where it matters in practice.
447 
448  unsigned MaxOps;
449  const TargetSubtargetInfo *ST = getST();
450  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
451  MaxOps = PartialUnrollingThreshold;
452  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
453  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
454  else
455  return;
456 
457  // Scan the loop: don't unroll loops with calls.
458  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
459  ++I) {
460  BasicBlock *BB = *I;
461 
462  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
463  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
464  ImmutableCallSite CS(&*J);
465  if (const Function *F = CS.getCalledFunction()) {
466  if (!static_cast<T *>(this)->isLoweredToCall(F))
467  continue;
468  }
469 
470  return;
471  }
472  }
473 
474  // Enable runtime and partial unrolling up to the specified size.
475  // Enable using trip count upper bound to unroll loops.
476  UP.Partial = UP.Runtime = UP.UpperBound = true;
477  UP.PartialThreshold = MaxOps;
478 
479  // Avoid unrolling when optimizing for size.
480  UP.OptSizeThreshold = 0;
482 
483  // Set number of instructions optimized when "back edge"
484  // becomes "fall through" to default value of 2.
485  UP.BEInsns = 2;
486  }
487 
489  if (isa<LoadInst>(I))
490  return getST()->getSchedModel().DefaultLoadLatency;
491 
493  }
494 
495  /// @}
496 
497  /// \name Vector TTI Implementations
498  /// @{
499 
500  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
501 
502  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
503 
504  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
505  /// are set if the result needs to be inserted and/or extracted from vectors.
506  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
507  assert(Ty->isVectorTy() && "Can only scalarize vectors");
508  unsigned Cost = 0;
509 
510  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
511  if (Insert)
512  Cost += static_cast<T *>(this)
513  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
514  if (Extract)
515  Cost += static_cast<T *>(this)
516  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
517  }
518 
519  return Cost;
520  }
521 
522  /// Estimate the overhead of scalarizing an instructions unique
523  /// non-constant operands. The types of the arguments are ordinarily
524  /// scalar, in which case the costs are multiplied with VF.
526  unsigned VF) {
527  unsigned Cost = 0;
528  SmallPtrSet<const Value*, 4> UniqueOperands;
529  for (const Value *A : Args) {
530  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
531  Type *VecTy = nullptr;
532  if (A->getType()->isVectorTy()) {
533  VecTy = A->getType();
534  // If A is a vector operand, VF should be 1 or correspond to A.
535  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
536  "Vector argument does not match VF");
537  }
538  else
539  VecTy = VectorType::get(A->getType(), VF);
540 
541  Cost += getScalarizationOverhead(VecTy, false, true);
542  }
543  }
544 
545  return Cost;
546  }
547 
549  assert(VecTy->isVectorTy());
550 
551  unsigned Cost = 0;
552 
553  Cost += getScalarizationOverhead(VecTy, true, false);
554  if (!Args.empty())
556  VecTy->getVectorNumElements());
557  else
558  // When no information on arguments is provided, we add the cost
559  // associated with one argument as a heuristic.
560  Cost += getScalarizationOverhead(VecTy, false, true);
561 
562  return Cost;
563  }
564 
565  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
566 
568  unsigned Opcode, Type *Ty,
574  // Check if any of the operands are vector operands.
575  const TargetLoweringBase *TLI = getTLI();
576  int ISD = TLI->InstructionOpcodeToISD(Opcode);
577  assert(ISD && "Invalid opcode");
578 
579  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
580 
581  bool IsFloat = Ty->isFPOrFPVectorTy();
582  // Assume that floating point arithmetic operations cost twice as much as
583  // integer operations.
584  unsigned OpCost = (IsFloat ? 2 : 1);
585 
586  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
587  // The operation is legal. Assume it costs 1.
588  // TODO: Once we have extract/insert subvector cost we need to use them.
589  return LT.first * OpCost;
590  }
591 
592  if (!TLI->isOperationExpand(ISD, LT.second)) {
593  // If the operation is custom lowered, then assume that the code is twice
594  // as expensive.
595  return LT.first * 2 * OpCost;
596  }
597 
598  // Else, assume that we need to scalarize this op.
599  // TODO: If one of the types get legalized by splitting, handle this
600  // similarly to what getCastInstrCost() does.
601  if (Ty->isVectorTy()) {
602  unsigned Num = Ty->getVectorNumElements();
603  unsigned Cost = static_cast<T *>(this)
604  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
605  // Return the cost of multiple scalar invocation plus the cost of
606  // inserting and extracting the values.
607  return getScalarizationOverhead(Ty, Args) + Num * Cost;
608  }
609 
610  // We don't know anything about this scalar instruction.
611  return OpCost;
612  }
613 
614  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
615  Type *SubTp) {
616  switch (Kind) {
617  case TTI::SK_Broadcast:
618  return getBroadcastShuffleOverhead(Tp);
619  case TTI::SK_Select:
620  case TTI::SK_Reverse:
621  case TTI::SK_Transpose:
624  return getPermuteShuffleOverhead(Tp);
626  return getExtractSubvectorOverhead(Tp, Index, SubTp);
628  return getInsertSubvectorOverhead(Tp, Index, SubTp);
629  }
630  llvm_unreachable("Unknown TTI::ShuffleKind");
631  }
632 
633  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
634  const Instruction *I = nullptr) {
635  const TargetLoweringBase *TLI = getTLI();
636  int ISD = TLI->InstructionOpcodeToISD(Opcode);
637  assert(ISD && "Invalid opcode");
638  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
639  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
640 
641  // Check for NOOP conversions.
642  if (SrcLT.first == DstLT.first &&
643  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
644 
645  // Bitcast between types that are legalized to the same type are free.
646  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
647  return 0;
648  }
649 
650  if (Opcode == Instruction::Trunc &&
651  TLI->isTruncateFree(SrcLT.second, DstLT.second))
652  return 0;
653 
654  if (Opcode == Instruction::ZExt &&
655  TLI->isZExtFree(SrcLT.second, DstLT.second))
656  return 0;
657 
658  if (Opcode == Instruction::AddrSpaceCast &&
660  Dst->getPointerAddressSpace()))
661  return 0;
662 
663  // If this is a zext/sext of a load, return 0 if the corresponding
664  // extending load exists on target.
665  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
666  I && isa<LoadInst>(I->getOperand(0))) {
667  EVT ExtVT = EVT::getEVT(Dst);
668  EVT LoadVT = EVT::getEVT(Src);
669  unsigned LType =
670  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
671  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
672  return 0;
673  }
674 
675  // If the cast is marked as legal (or promote) then assume low cost.
676  if (SrcLT.first == DstLT.first &&
677  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
678  return 1;
679 
680  // Handle scalar conversions.
681  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
682  // Scalar bitcasts are usually free.
683  if (Opcode == Instruction::BitCast)
684  return 0;
685 
686  // Just check the op cost. If the operation is legal then assume it costs
687  // 1.
688  if (!TLI->isOperationExpand(ISD, DstLT.second))
689  return 1;
690 
691  // Assume that illegal scalar instruction are expensive.
692  return 4;
693  }
694 
695  // Check vector-to-vector casts.
696  if (Dst->isVectorTy() && Src->isVectorTy()) {
697  // If the cast is between same-sized registers, then the check is simple.
698  if (SrcLT.first == DstLT.first &&
699  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
700 
701  // Assume that Zext is done using AND.
702  if (Opcode == Instruction::ZExt)
703  return 1;
704 
705  // Assume that sext is done using SHL and SRA.
706  if (Opcode == Instruction::SExt)
707  return 2;
708 
709  // Just check the op cost. If the operation is legal then assume it
710  // costs
711  // 1 and multiply by the type-legalization overhead.
712  if (!TLI->isOperationExpand(ISD, DstLT.second))
713  return SrcLT.first * 1;
714  }
715 
716  // If we are legalizing by splitting, query the concrete TTI for the cost
717  // of casting the original vector twice. We also need to factor in the
718  // cost of the split itself. Count that as 1, to be consistent with
719  // TLI->getTypeLegalizationCost().
720  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
722  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
724  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
725  Dst->getVectorNumElements() / 2);
726  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
727  Src->getVectorNumElements() / 2);
728  T *TTI = static_cast<T *>(this);
729  return TTI->getVectorSplitCost() +
730  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
731  }
732 
733  // In other cases where the source or destination are illegal, assume
734  // the operation will get scalarized.
735  unsigned Num = Dst->getVectorNumElements();
736  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
737  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
738 
739  // Return the cost of multiple scalar invocation plus the cost of
740  // inserting and extracting the values.
741  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
742  }
743 
744  // We already handled vector-to-vector and scalar-to-scalar conversions.
745  // This
746  // is where we handle bitcast between vectors and scalars. We need to assume
747  // that the conversion is scalarized in one way or another.
748  if (Opcode == Instruction::BitCast)
749  // Illegal bitcasts are done by storing and loading from a stack slot.
750  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
751  : 0) +
752  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
753  : 0);
754 
755  llvm_unreachable("Unhandled cast");
756  }
757 
758  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
759  VectorType *VecTy, unsigned Index) {
760  return static_cast<T *>(this)->getVectorInstrCost(
761  Instruction::ExtractElement, VecTy, Index) +
762  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
763  VecTy->getElementType());
764  }
765 
766  unsigned getCFInstrCost(unsigned Opcode) {
767  // Branches are assumed to be predicted.
768  return 0;
769  }
770 
771  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
772  const Instruction *I) {
773  const TargetLoweringBase *TLI = getTLI();
774  int ISD = TLI->InstructionOpcodeToISD(Opcode);
775  assert(ISD && "Invalid opcode");
776 
777  // Selects on vectors are actually vector selects.
778  if (ISD == ISD::SELECT) {
779  assert(CondTy && "CondTy must exist");
780  if (CondTy->isVectorTy())
781  ISD = ISD::VSELECT;
782  }
783  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
784 
785  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
786  !TLI->isOperationExpand(ISD, LT.second)) {
787  // The operation is legal. Assume it costs 1. Multiply
788  // by the type-legalization overhead.
789  return LT.first * 1;
790  }
791 
792  // Otherwise, assume that the cast is scalarized.
793  // TODO: If one of the types get legalized by splitting, handle this
794  // similarly to what getCastInstrCost() does.
795  if (ValTy->isVectorTy()) {
796  unsigned Num = ValTy->getVectorNumElements();
797  if (CondTy)
798  CondTy = CondTy->getScalarType();
799  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
800  Opcode, ValTy->getScalarType(), CondTy, I);
801 
802  // Return the cost of multiple scalar invocation plus the cost of
803  // inserting and extracting the values.
804  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
805  }
806 
807  // Unknown scalar opcode.
808  return 1;
809  }
810 
811  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
812  std::pair<unsigned, MVT> LT =
813  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
814 
815  return LT.first;
816  }
817 
818  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
819  unsigned AddressSpace, const Instruction *I = nullptr) {
820  assert(!Src->isVoidTy() && "Invalid type");
821  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
822 
823  // Assuming that all loads of legal types cost 1.
824  unsigned Cost = LT.first;
825 
826  if (Src->isVectorTy() &&
827  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
828  // This is a vector load that legalizes to a larger type than the vector
829  // itself. Unless the corresponding extending load or truncating store is
830  // legal, then this will scalarize.
832  EVT MemVT = getTLI()->getValueType(DL, Src);
833  if (Opcode == Instruction::Store)
834  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
835  else
836  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
837 
838  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
839  // This is a vector load/store for some illegal type that is scalarized.
840  // We must account for the cost of building or decomposing the vector.
841  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
842  Opcode == Instruction::Store);
843  }
844  }
845 
846  return Cost;
847  }
848 
849  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
850  unsigned Factor,
851  ArrayRef<unsigned> Indices,
852  unsigned Alignment, unsigned AddressSpace,
853  bool UseMaskForCond = false,
854  bool UseMaskForGaps = false) {
855  VectorType *VT = dyn_cast<VectorType>(VecTy);
856  assert(VT && "Expect a vector type for interleaved memory op");
857 
858  unsigned NumElts = VT->getNumElements();
859  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
860 
861  unsigned NumSubElts = NumElts / Factor;
862  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
863 
864  // Firstly, the cost of load/store operation.
865  unsigned Cost;
866  if (UseMaskForCond || UseMaskForGaps)
867  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
868  Opcode, VecTy, Alignment, AddressSpace);
869  else
870  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
871  AddressSpace);
872 
873  // Legalize the vector type, and get the legalized and unlegalized type
874  // sizes.
875  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
876  unsigned VecTySize =
877  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
878  unsigned VecTyLTSize = VecTyLT.getStoreSize();
879 
880  // Return the ceiling of dividing A by B.
881  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
882 
883  // Scale the cost of the memory operation by the fraction of legalized
884  // instructions that will actually be used. We shouldn't account for the
885  // cost of dead instructions since they will be removed.
886  //
887  // E.g., An interleaved load of factor 8:
888  // %vec = load <16 x i64>, <16 x i64>* %ptr
889  // %v0 = shufflevector %vec, undef, <0, 8>
890  //
891  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
892  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
893  // type). The other loads are unused.
894  //
895  // We only scale the cost of loads since interleaved store groups aren't
896  // allowed to have gaps.
897  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
898  // The number of loads of a legal type it will take to represent a load
899  // of the unlegalized vector type.
900  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
901 
902  // The number of elements of the unlegalized type that correspond to a
903  // single legal instruction.
904  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
905 
906  // Determine which legal instructions will be used.
907  BitVector UsedInsts(NumLegalInsts, false);
908  for (unsigned Index : Indices)
909  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
910  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
911 
912  // Scale the cost of the load by the fraction of legal instructions that
913  // will be used.
914  Cost *= UsedInsts.count() / NumLegalInsts;
915  }
916 
917  // Then plus the cost of interleave operation.
918  if (Opcode == Instruction::Load) {
919  // The interleave cost is similar to extract sub vectors' elements
920  // from the wide vector, and insert them into sub vectors.
921  //
922  // E.g. An interleaved load of factor 2 (with one member of index 0):
923  // %vec = load <8 x i32>, <8 x i32>* %ptr
924  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
925  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
926  // <8 x i32> vector and insert them into a <4 x i32> vector.
927 
928  assert(Indices.size() <= Factor &&
929  "Interleaved memory op has too many members");
930 
931  for (unsigned Index : Indices) {
932  assert(Index < Factor && "Invalid index for interleaved memory op");
933 
934  // Extract elements from loaded vector for each sub vector.
935  for (unsigned i = 0; i < NumSubElts; i++)
936  Cost += static_cast<T *>(this)->getVectorInstrCost(
937  Instruction::ExtractElement, VT, Index + i * Factor);
938  }
939 
940  unsigned InsSubCost = 0;
941  for (unsigned i = 0; i < NumSubElts; i++)
942  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
943  Instruction::InsertElement, SubVT, i);
944 
945  Cost += Indices.size() * InsSubCost;
946  } else {
947  // The interleave cost is extract all elements from sub vectors, and
948  // insert them into the wide vector.
949  //
950  // E.g. An interleaved store of factor 2:
951  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
952  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
953  // The cost is estimated as extract all elements from both <4 x i32>
954  // vectors and insert into the <8 x i32> vector.
955 
956  unsigned ExtSubCost = 0;
957  for (unsigned i = 0; i < NumSubElts; i++)
958  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
959  Instruction::ExtractElement, SubVT, i);
960  Cost += ExtSubCost * Factor;
961 
962  for (unsigned i = 0; i < NumElts; i++)
963  Cost += static_cast<T *>(this)
964  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
965  }
966 
967  if (!UseMaskForCond)
968  return Cost;
969 
970  Type *I8Type = Type::getInt8Ty(VT->getContext());
971  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
972  SubVT = VectorType::get(I8Type, NumSubElts);
973 
974  // The Mask shuffling cost is extract all the elements of the Mask
975  // and insert each of them Factor times into the wide vector:
976  //
977  // E.g. an interleaved group with factor 3:
978  // %mask = icmp ult <8 x i32> %vec1, %vec2
979  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
980  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
981  // The cost is estimated as extract all mask elements from the <8xi1> mask
982  // vector and insert them factor times into the <24xi1> shuffled mask
983  // vector.
984  for (unsigned i = 0; i < NumSubElts; i++)
985  Cost += static_cast<T *>(this)->getVectorInstrCost(
986  Instruction::ExtractElement, SubVT, i);
987 
988  for (unsigned i = 0; i < NumElts; i++)
989  Cost += static_cast<T *>(this)->getVectorInstrCost(
990  Instruction::InsertElement, MaskVT, i);
991 
992  // The Gaps mask is invariant and created outside the loop, therefore the
993  // cost of creating it is not accounted for here. However if we have both
994  // a MaskForGaps and some other mask that guards the execution of the
995  // memory access, we need to account for the cost of And-ing the two masks
996  // inside the loop.
997  if (UseMaskForGaps)
998  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
999  BinaryOperator::And, MaskVT);
1000 
1001  return Cost;
1002  }
1003 
1004  /// Get intrinsic cost based on arguments.
1007  unsigned VF = 1) {
1008  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1009  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1010  auto *ConcreteTTI = static_cast<T *>(this);
1011 
1012  switch (IID) {
1013  default: {
1014  // Assume that we need to scalarize this intrinsic.
1015  SmallVector<Type *, 4> Types;
1016  for (Value *Op : Args) {
1017  Type *OpTy = Op->getType();
1018  assert(VF == 1 || !OpTy->isVectorTy());
1019  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1020  }
1021 
1022  if (VF > 1 && !RetTy->isVoidTy())
1023  RetTy = VectorType::get(RetTy, VF);
1024 
1025  // Compute the scalarization overhead based on Args for a vector
1026  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1027  // CostModel will pass a vector RetTy and VF is 1.
1028  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1029  if (RetVF > 1 || VF > 1) {
1030  ScalarizationCost = 0;
1031  if (!RetTy->isVoidTy())
1032  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1033  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1034  }
1035 
1036  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1037  ScalarizationCost);
1038  }
1039  case Intrinsic::masked_scatter: {
1040  assert(VF == 1 && "Can't vectorize types here.");
1041  Value *Mask = Args[3];
1042  bool VarMask = !isa<Constant>(Mask);
1043  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1044  return ConcreteTTI->getGatherScatterOpCost(
1045  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1046  }
1047  case Intrinsic::masked_gather: {
1048  assert(VF == 1 && "Can't vectorize types here.");
1049  Value *Mask = Args[2];
1050  bool VarMask = !isa<Constant>(Mask);
1051  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1052  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1053  Args[0], VarMask, Alignment);
1054  }
1055  case Intrinsic::experimental_vector_reduce_add:
1056  case Intrinsic::experimental_vector_reduce_mul:
1057  case Intrinsic::experimental_vector_reduce_and:
1058  case Intrinsic::experimental_vector_reduce_or:
1059  case Intrinsic::experimental_vector_reduce_xor:
1060  case Intrinsic::experimental_vector_reduce_fadd:
1061  case Intrinsic::experimental_vector_reduce_fmul:
1062  case Intrinsic::experimental_vector_reduce_smax:
1063  case Intrinsic::experimental_vector_reduce_smin:
1064  case Intrinsic::experimental_vector_reduce_fmax:
1065  case Intrinsic::experimental_vector_reduce_fmin:
1066  case Intrinsic::experimental_vector_reduce_umax:
1067  case Intrinsic::experimental_vector_reduce_umin:
1068  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1069  case Intrinsic::fshl:
1070  case Intrinsic::fshr: {
1071  Value *X = Args[0];
1072  Value *Y = Args[1];
1073  Value *Z = Args[2];
1074  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1075  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1076  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1077  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1079  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1080  : TTI::OP_None;
1081  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1082  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1083  unsigned Cost = 0;
1084  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1085  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1086  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1087  OpKindX, OpKindZ, OpPropsX);
1088  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1089  OpKindY, OpKindZ, OpPropsY);
1090  // Non-constant shift amounts requires a modulo.
1091  if (OpKindZ != TTI::OK_UniformConstantValue &&
1093  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1094  OpKindZ, OpKindBW, OpPropsZ,
1095  OpPropsBW);
1096  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1097  if (X != Y) {
1098  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1099  if (RetVF > 1)
1100  CondTy = VectorType::get(CondTy, RetVF);
1101  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1102  CondTy, nullptr);
1103  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1104  CondTy, nullptr);
1105  }
1106  return Cost;
1107  }
1108  }
1109  }
1110 
1111  /// Get intrinsic cost based on argument types.
1112  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1113  /// cost of scalarizing the arguments and the return value will be computed
1114  /// based on types.
1116  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1117  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1118  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1119  auto *ConcreteTTI = static_cast<T *>(this);
1120 
1122  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1123  switch (IID) {
1124  default: {
1125  // Assume that we need to scalarize this intrinsic.
1126  unsigned ScalarizationCost = ScalarizationCostPassed;
1127  unsigned ScalarCalls = 1;
1128  Type *ScalarRetTy = RetTy;
1129  if (RetTy->isVectorTy()) {
1130  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1131  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1132  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1133  ScalarRetTy = RetTy->getScalarType();
1134  }
1135  SmallVector<Type *, 4> ScalarTys;
1136  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1137  Type *Ty = Tys[i];
1138  if (Ty->isVectorTy()) {
1139  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1140  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1141  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1142  Ty = Ty->getScalarType();
1143  }
1144  ScalarTys.push_back(Ty);
1145  }
1146  if (ScalarCalls == 1)
1147  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1148 
1149  unsigned ScalarCost =
1150  ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1151 
1152  return ScalarCalls * ScalarCost + ScalarizationCost;
1153  }
1154  // Look for intrinsics that can be lowered directly or turned into a scalar
1155  // intrinsic call.
1156  case Intrinsic::sqrt:
1157  ISDs.push_back(ISD::FSQRT);
1158  break;
1159  case Intrinsic::sin:
1160  ISDs.push_back(ISD::FSIN);
1161  break;
1162  case Intrinsic::cos:
1163  ISDs.push_back(ISD::FCOS);
1164  break;
1165  case Intrinsic::exp:
1166  ISDs.push_back(ISD::FEXP);
1167  break;
1168  case Intrinsic::exp2:
1169  ISDs.push_back(ISD::FEXP2);
1170  break;
1171  case Intrinsic::log:
1172  ISDs.push_back(ISD::FLOG);
1173  break;
1174  case Intrinsic::log10:
1175  ISDs.push_back(ISD::FLOG10);
1176  break;
1177  case Intrinsic::log2:
1178  ISDs.push_back(ISD::FLOG2);
1179  break;
1180  case Intrinsic::fabs:
1181  ISDs.push_back(ISD::FABS);
1182  break;
1183  case Intrinsic::canonicalize:
1185  break;
1186  case Intrinsic::minnum:
1187  ISDs.push_back(ISD::FMINNUM);
1188  if (FMF.noNaNs())
1189  ISDs.push_back(ISD::FMINIMUM);
1190  break;
1191  case Intrinsic::maxnum:
1192  ISDs.push_back(ISD::FMAXNUM);
1193  if (FMF.noNaNs())
1194  ISDs.push_back(ISD::FMAXIMUM);
1195  break;
1196  case Intrinsic::copysign:
1197  ISDs.push_back(ISD::FCOPYSIGN);
1198  break;
1199  case Intrinsic::floor:
1200  ISDs.push_back(ISD::FFLOOR);
1201  break;
1202  case Intrinsic::ceil:
1203  ISDs.push_back(ISD::FCEIL);
1204  break;
1205  case Intrinsic::trunc:
1206  ISDs.push_back(ISD::FTRUNC);
1207  break;
1208  case Intrinsic::nearbyint:
1209  ISDs.push_back(ISD::FNEARBYINT);
1210  break;
1211  case Intrinsic::rint:
1212  ISDs.push_back(ISD::FRINT);
1213  break;
1214  case Intrinsic::round:
1215  ISDs.push_back(ISD::FROUND);
1216  break;
1217  case Intrinsic::pow:
1218  ISDs.push_back(ISD::FPOW);
1219  break;
1220  case Intrinsic::fma:
1221  ISDs.push_back(ISD::FMA);
1222  break;
1223  case Intrinsic::fmuladd:
1224  ISDs.push_back(ISD::FMA);
1225  break;
1226  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1227  case Intrinsic::lifetime_start:
1228  case Intrinsic::lifetime_end:
1229  case Intrinsic::sideeffect:
1230  return 0;
1231  case Intrinsic::masked_store:
1232  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1233  0);
1234  case Intrinsic::masked_load:
1235  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1236  case Intrinsic::experimental_vector_reduce_add:
1237  return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1238  /*IsPairwiseForm=*/false);
1239  case Intrinsic::experimental_vector_reduce_mul:
1240  return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1241  /*IsPairwiseForm=*/false);
1242  case Intrinsic::experimental_vector_reduce_and:
1243  return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1244  /*IsPairwiseForm=*/false);
1245  case Intrinsic::experimental_vector_reduce_or:
1246  return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1247  /*IsPairwiseForm=*/false);
1248  case Intrinsic::experimental_vector_reduce_xor:
1249  return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1250  /*IsPairwiseForm=*/false);
1251  case Intrinsic::experimental_vector_reduce_fadd:
1252  return ConcreteTTI->getArithmeticReductionCost(Instruction::FAdd, Tys[0],
1253  /*IsPairwiseForm=*/false);
1254  case Intrinsic::experimental_vector_reduce_fmul:
1255  return ConcreteTTI->getArithmeticReductionCost(Instruction::FMul, Tys[0],
1256  /*IsPairwiseForm=*/false);
1257  case Intrinsic::experimental_vector_reduce_smax:
1258  case Intrinsic::experimental_vector_reduce_smin:
1259  case Intrinsic::experimental_vector_reduce_fmax:
1260  case Intrinsic::experimental_vector_reduce_fmin:
1261  return ConcreteTTI->getMinMaxReductionCost(
1262  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1263  /*IsSigned=*/true);
1264  case Intrinsic::experimental_vector_reduce_umax:
1265  case Intrinsic::experimental_vector_reduce_umin:
1266  return ConcreteTTI->getMinMaxReductionCost(
1267  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1268  /*IsSigned=*/false);
1269  case Intrinsic::sadd_sat:
1270  case Intrinsic::ssub_sat: {
1271  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1272  if (RetVF > 1)
1273  CondTy = VectorType::get(CondTy, RetVF);
1274 
1275  Type *OpTy = StructType::create({RetTy, CondTy});
1276  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1277  ? Intrinsic::sadd_with_overflow
1278  : Intrinsic::ssub_with_overflow;
1279 
1280  // SatMax -> Overflow && SumDiff < 0
1281  // SatMin -> Overflow && SumDiff >= 0
1282  unsigned Cost = 0;
1283  Cost += ConcreteTTI->getIntrinsicInstrCost(
1284  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1285  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1286  CondTy, nullptr);
1287  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1288  CondTy, nullptr);
1289  return Cost;
1290  }
1291  case Intrinsic::uadd_sat:
1292  case Intrinsic::usub_sat: {
1293  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1294  if (RetVF > 1)
1295  CondTy = VectorType::get(CondTy, RetVF);
1296 
1297  Type *OpTy = StructType::create({RetTy, CondTy});
1298  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1299  ? Intrinsic::uadd_with_overflow
1300  : Intrinsic::usub_with_overflow;
1301 
1302  unsigned Cost = 0;
1303  Cost += ConcreteTTI->getIntrinsicInstrCost(
1304  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1306  CondTy, nullptr);
1307  return Cost;
1308  }
1309  case Intrinsic::smul_fix:
1310  case Intrinsic::umul_fix: {
1311  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1312  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1313  if (RetVF > 1)
1314  ExtTy = VectorType::get(ExtTy, RetVF);
1315 
1316  unsigned ExtOp =
1317  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1318 
1319  unsigned Cost = 0;
1320  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1321  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1322  Cost +=
1323  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1324  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1327  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1330  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1331  return Cost;
1332  }
1333  case Intrinsic::sadd_with_overflow:
1334  case Intrinsic::ssub_with_overflow: {
1335  Type *SumTy = RetTy->getContainedType(0);
1336  Type *OverflowTy = RetTy->getContainedType(1);
1337  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1339  : BinaryOperator::Sub;
1340 
1341  // LHSSign -> LHS >= 0
1342  // RHSSign -> RHS >= 0
1343  // SumSign -> Sum >= 0
1344  //
1345  // Add:
1346  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1347  // Sub:
1348  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1349  unsigned Cost = 0;
1350  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1351  Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1352  OverflowTy, nullptr);
1353  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1354  BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1355  Cost +=
1356  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1357  return Cost;
1358  }
1359  case Intrinsic::uadd_with_overflow:
1360  case Intrinsic::usub_with_overflow: {
1361  Type *SumTy = RetTy->getContainedType(0);
1362  Type *OverflowTy = RetTy->getContainedType(1);
1363  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1365  : BinaryOperator::Sub;
1366 
1367  unsigned Cost = 0;
1368  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1369  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1370  OverflowTy, nullptr);
1371  return Cost;
1372  }
1373  case Intrinsic::smul_with_overflow:
1374  case Intrinsic::umul_with_overflow: {
1375  Type *MulTy = RetTy->getContainedType(0);
1376  Type *OverflowTy = RetTy->getContainedType(1);
1377  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1378  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1379  if (MulTy->isVectorTy())
1380  ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1381 
1382  unsigned ExtOp =
1383  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1384 
1385  unsigned Cost = 0;
1386  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1387  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1388  Cost +=
1389  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1390  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1393 
1394  if (IID == Intrinsic::smul_with_overflow)
1395  Cost += ConcreteTTI->getArithmeticInstrCost(
1396  Instruction::AShr, MulTy, TTI::OK_AnyValue,
1398 
1399  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1400  OverflowTy, nullptr);
1401  return Cost;
1402  }
1403  case Intrinsic::ctpop:
1404  ISDs.push_back(ISD::CTPOP);
1405  // In case of legalization use TCC_Expensive. This is cheaper than a
1406  // library call but still not a cheap instruction.
1407  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1408  break;
1409  // FIXME: ctlz, cttz, ...
1410  }
1411 
1412  const TargetLoweringBase *TLI = getTLI();
1413  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1414 
1415  SmallVector<unsigned, 2> LegalCost;
1416  SmallVector<unsigned, 2> CustomCost;
1417  for (unsigned ISD : ISDs) {
1418  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1419  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1420  TLI->isFAbsFree(LT.second)) {
1421  return 0;
1422  }
1423 
1424  // The operation is legal. Assume it costs 1.
1425  // If the type is split to multiple registers, assume that there is some
1426  // overhead to this.
1427  // TODO: Once we have extract/insert subvector cost we need to use them.
1428  if (LT.first > 1)
1429  LegalCost.push_back(LT.first * 2);
1430  else
1431  LegalCost.push_back(LT.first * 1);
1432  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1433  // If the operation is custom lowered then assume
1434  // that the code is twice as expensive.
1435  CustomCost.push_back(LT.first * 2);
1436  }
1437  }
1438 
1439  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1440  if (MinLegalCostI != LegalCost.end())
1441  return *MinLegalCostI;
1442 
1443  auto MinCustomCostI =
1444  std::min_element(CustomCost.begin(), CustomCost.end());
1445  if (MinCustomCostI != CustomCost.end())
1446  return *MinCustomCostI;
1447 
1448  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1449  // point mul followed by an add.
1450  if (IID == Intrinsic::fmuladd)
1451  return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1452  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1453 
1454  // Else, assume that we need to scalarize this intrinsic. For math builtins
1455  // this will emit a costly libcall, adding call overhead and spills. Make it
1456  // very expensive.
1457  if (RetTy->isVectorTy()) {
1458  unsigned ScalarizationCost =
1459  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1460  ? ScalarizationCostPassed
1461  : getScalarizationOverhead(RetTy, true, false));
1462  unsigned ScalarCalls = RetTy->getVectorNumElements();
1463  SmallVector<Type *, 4> ScalarTys;
1464  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1465  Type *Ty = Tys[i];
1466  if (Ty->isVectorTy())
1467  Ty = Ty->getScalarType();
1468  ScalarTys.push_back(Ty);
1469  }
1470  unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1471  IID, RetTy->getScalarType(), ScalarTys, FMF);
1472  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1473  if (Tys[i]->isVectorTy()) {
1474  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1475  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1476  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1477  }
1478  }
1479 
1480  return ScalarCalls * ScalarCost + ScalarizationCost;
1481  }
1482 
1483  // This is going to be turned into a library call, make it expensive.
1484  return SingleCallCost;
1485  }
1486 
1487  /// Compute a cost of the given call instruction.
1488  ///
1489  /// Compute the cost of calling function F with return type RetTy and
1490  /// argument types Tys. F might be nullptr, in this case the cost of an
1491  /// arbitrary call with the specified signature will be returned.
1492  /// This is used, for instance, when we estimate call of a vector
1493  /// counterpart of the given function.
1494  /// \param F Called function, might be nullptr.
1495  /// \param RetTy Return value types.
1496  /// \param Tys Argument types.
1497  /// \returns The cost of Call instruction.
1499  return 10;
1500  }
1501 
1502  unsigned getNumberOfParts(Type *Tp) {
1503  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1504  return LT.first;
1505  }
1506 
1508  const SCEV *) {
1509  return 0;
1510  }
1511 
1512  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1513  /// We're assuming that reduction operation are performing the following way:
1514  /// 1. Non-pairwise reduction
1515  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1516  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1517  /// \----------------v-------------/ \----------v------------/
1518  /// n/2 elements n/2 elements
1519  /// %red1 = op <n x t> %val, <n x t> val1
1520  /// After this operation we have a vector %red1 where only the first n/2
1521  /// elements are meaningful, the second n/2 elements are undefined and can be
1522  /// dropped. All other operations are actually working with the vector of
1523  /// length n/2, not n, though the real vector length is still n.
1524  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1525  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1526  /// \----------------v-------------/ \----------v------------/
1527  /// n/4 elements 3*n/4 elements
1528  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1529  /// length n/2, the resulting vector has length n/4 etc.
1530  /// 2. Pairwise reduction:
1531  /// Everything is the same except for an additional shuffle operation which
1532  /// is used to produce operands for pairwise kind of reductions.
1533  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1534  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1535  /// \-------------v----------/ \----------v------------/
1536  /// n/2 elements n/2 elements
1537  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1538  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1539  /// \-------------v----------/ \----------v------------/
1540  /// n/2 elements n/2 elements
1541  /// %red1 = op <n x t> %val1, <n x t> val2
1542  /// Again, the operation is performed on <n x t> vector, but the resulting
1543  /// vector %red1 is <n/2 x t> vector.
1544  ///
1545  /// The cost model should take into account that the actual length of the
1546  /// vector is reduced on each iteration.
1547  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1548  bool IsPairwise) {
1549  assert(Ty->isVectorTy() && "Expect a vector type");
1550  Type *ScalarTy = Ty->getVectorElementType();
1551  unsigned NumVecElts = Ty->getVectorNumElements();
1552  unsigned NumReduxLevels = Log2_32(NumVecElts);
1553  unsigned ArithCost = 0;
1554  unsigned ShuffleCost = 0;
1555  auto *ConcreteTTI = static_cast<T *>(this);
1556  std::pair<unsigned, MVT> LT =
1557  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1558  unsigned LongVectorCount = 0;
1559  unsigned MVTLen =
1560  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1561  while (NumVecElts > MVTLen) {
1562  NumVecElts /= 2;
1563  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1564  // Assume the pairwise shuffles add a cost.
1565  ShuffleCost += (IsPairwise + 1) *
1566  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1567  NumVecElts, SubTy);
1568  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1569  Ty = SubTy;
1570  ++LongVectorCount;
1571  }
1572 
1573  NumReduxLevels -= LongVectorCount;
1574 
1575  // The minimal length of the vector is limited by the real length of vector
1576  // operations performed on the current platform. That's why several final
1577  // reduction operations are performed on the vectors with the same
1578  // architecture-dependent length.
1579 
1580  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1581  // reductions need two shuffles on every level, but the last one. On that
1582  // level one of the shuffles is <0, u, u, ...> which is identity.
1583  unsigned NumShuffles = NumReduxLevels;
1584  if (IsPairwise && NumReduxLevels >= 1)
1585  NumShuffles += NumReduxLevels - 1;
1586  ShuffleCost += NumShuffles *
1587  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1588  0, Ty);
1589  ArithCost += NumReduxLevels *
1590  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1591  return ShuffleCost + ArithCost +
1592  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1593  }
1594 
1595  /// Try to calculate op costs for min/max reduction operations.
1596  /// \param CondTy Conditional type for the Select instruction.
1597  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1598  bool) {
1599  assert(Ty->isVectorTy() && "Expect a vector type");
1600  Type *ScalarTy = Ty->getVectorElementType();
1601  Type *ScalarCondTy = CondTy->getVectorElementType();
1602  unsigned NumVecElts = Ty->getVectorNumElements();
1603  unsigned NumReduxLevels = Log2_32(NumVecElts);
1604  unsigned CmpOpcode;
1605  if (Ty->isFPOrFPVectorTy()) {
1606  CmpOpcode = Instruction::FCmp;
1607  } else {
1608  assert(Ty->isIntOrIntVectorTy() &&
1609  "expecting floating point or integer type for min/max reduction");
1610  CmpOpcode = Instruction::ICmp;
1611  }
1612  unsigned MinMaxCost = 0;
1613  unsigned ShuffleCost = 0;
1614  auto *ConcreteTTI = static_cast<T *>(this);
1615  std::pair<unsigned, MVT> LT =
1616  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1617  unsigned LongVectorCount = 0;
1618  unsigned MVTLen =
1619  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1620  while (NumVecElts > MVTLen) {
1621  NumVecElts /= 2;
1622  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1623  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1624 
1625  // Assume the pairwise shuffles add a cost.
1626  ShuffleCost += (IsPairwise + 1) *
1627  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1628  NumVecElts, SubTy);
1629  MinMaxCost +=
1630  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1631  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1632  nullptr);
1633  Ty = SubTy;
1634  ++LongVectorCount;
1635  }
1636 
1637  NumReduxLevels -= LongVectorCount;
1638 
1639  // The minimal length of the vector is limited by the real length of vector
1640  // operations performed on the current platform. That's why several final
1641  // reduction opertions are perfomed on the vectors with the same
1642  // architecture-dependent length.
1643 
1644  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1645  // reductions need two shuffles on every level, but the last one. On that
1646  // level one of the shuffles is <0, u, u, ...> which is identity.
1647  unsigned NumShuffles = NumReduxLevels;
1648  if (IsPairwise && NumReduxLevels >= 1)
1649  NumShuffles += NumReduxLevels - 1;
1650  ShuffleCost += NumShuffles *
1651  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1652  0, Ty);
1653  MinMaxCost +=
1654  NumReduxLevels *
1655  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1656  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1657  nullptr));
1658  // The last min/max should be in vector registers and we counted it above.
1659  // So just need a single extractelement.
1660  return ShuffleCost + MinMaxCost +
1661  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1662  }
1663 
1664  unsigned getVectorSplitCost() { return 1; }
1665 
1666  /// @}
1667 };
1668 
1669 /// Concrete BasicTTIImpl that can be used if no further customization
1670 /// is needed.
1671 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1673 
1675 
1676  const TargetSubtargetInfo *ST;
1677  const TargetLoweringBase *TLI;
1678 
1679  const TargetSubtargetInfo *getST() const { return ST; }
1680  const TargetLoweringBase *getTLI() const { return TLI; }
1681 
1682 public:
1683  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1684 };
1685 
1686 } // end namespace llvm
1687 
1688 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:370
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
Definition: BasicTTIImpl.h:294
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:595
BitVector & set()
Definition: BitVector.h:397
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:567
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:205
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:889
This class represents lattice values for constants.
Definition: AllocatorList.h:23
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:606
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:506
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:277
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:191
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:282
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:534
An instruction for reading from memory.
Definition: Instructions.h:167
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:266
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:565
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:525
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:758
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:252
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:390
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:199
zlib-gnu style compression
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:502
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:771
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:633
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:428
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:390
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:242
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:394
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:262
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:423
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:849
Simple binary floating point operators.
Definition: ISDOpcodes.h:282
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:33
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1237
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:217
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:766
size_type size() const
Definition: SmallPtrSet.h:92
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:500
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:318
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:172
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:488
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:270
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
AddressSpace
Definition: NVPTXBaseInfo.h:21
cl::opt< unsigned > PartialUnrollingThreshold
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class to represent vector types.
Definition: DerivedTypes.h:424
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:818
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:311
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:210
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:212
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:639
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:404
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:373
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:892
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:421
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:811
block_iterator block_end() const
Definition: LoopInfo.h:154
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:316
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:248
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:316
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:548
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:208
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:436
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:272
Type * getElementType() const
Definition: DerivedTypes.h:391
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:236
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:383
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:225
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:221
block_iterator block_begin() const
Definition: LoopInfo.h:153
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1226
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:332
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:923
BRIND - Indirect branch.
Definition: ISDOpcodes.h:635
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
Definition: BasicTTIImpl.h:299