LLVM  15.0.0git
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Helper function to access this as a T.
83  T *thisT() { return static_cast<T *>(this); }
84 
85  /// Estimate a cost of Broadcast as an extract and sequence of insert
86  /// operations.
87  InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) {
88  InstructionCost Cost = 0;
89  // Broadcast cost is equal to the cost of extracting the zero'th element
90  // plus the cost of inserting it into every element of the result vector.
91  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
92 
93  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
94  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) {
102  InstructionCost Cost = 0;
103  // Shuffle cost is equal to the cost of extracting element from its argument
104  // plus the cost of inserting them onto the result vector.
105 
106  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
107  // index 0 of first vector, index 1 of second vector,index 2 of first
108  // vector and finally index 3 of second vector and insert them at index
109  // <0,1,2,3> of result vector.
110  for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
111  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
112  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
113  }
114  return Cost;
115  }
116 
117  /// Estimate a cost of subvector extraction as a sequence of extract and
118  /// insert operations.
119  InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index,
120  FixedVectorType *SubVTy) {
121  assert(VTy && SubVTy &&
122  "Can only extract subvectors from vectors");
123  int NumSubElts = SubVTy->getNumElements();
124  assert((!isa<FixedVectorType>(VTy) ||
125  (Index + NumSubElts) <=
126  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
127  "SK_ExtractSubvector index out of range");
128 
129  InstructionCost Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
135  i + Index);
136  Cost +=
137  thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index,
145  FixedVectorType *SubVTy) {
146  assert(VTy && SubVTy &&
147  "Can only insert subvectors into vectors");
148  int NumSubElts = SubVTy->getNumElements();
149  assert((!isa<FixedVectorType>(VTy) ||
150  (Index + NumSubElts) <=
151  (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
152  "SK_InsertSubvector index out of range");
153 
154  InstructionCost Cost = 0;
155  // Subvector insertion cost is equal to the cost of extracting element from
156  // the source type plus the cost of inserting them into the result vector
157  // type.
158  for (int i = 0; i != NumSubElts; ++i) {
159  Cost +=
160  thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
161  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
162  i + Index);
163  }
164  return Cost;
165  }
166 
167  /// Local query method delegates up to T which *must* implement this!
168  const TargetSubtargetInfo *getST() const {
169  return static_cast<const T *>(this)->getST();
170  }
171 
172  /// Local query method delegates up to T which *must* implement this!
173  const TargetLoweringBase *getTLI() const {
174  return static_cast<const T *>(this)->getTLI();
175  }
176 
177  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
178  switch (M) {
179  case TTI::MIM_Unindexed:
180  return ISD::UNINDEXED;
181  case TTI::MIM_PreInc:
182  return ISD::PRE_INC;
183  case TTI::MIM_PreDec:
184  return ISD::PRE_DEC;
185  case TTI::MIM_PostInc:
186  return ISD::POST_INC;
187  case TTI::MIM_PostDec:
188  return ISD::POST_DEC;
189  }
190  llvm_unreachable("Unexpected MemIndexedMode");
191  }
192 
193  InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
194  Align Alignment,
195  bool VariableMask,
196  bool IsGatherScatter,
198  auto *VT = cast<FixedVectorType>(DataTy);
199  // Assume the target does not have support for gather/scatter operations
200  // and provide a rough estimate.
201  //
202  // First, compute the cost of the individual memory operations.
203  InstructionCost AddrExtractCost =
204  IsGatherScatter
205  ? getVectorInstrCost(Instruction::ExtractElement,
207  PointerType::get(VT->getElementType(), 0),
208  VT->getNumElements()),
209  -1)
210  : 0;
211  InstructionCost LoadCost =
212  VT->getNumElements() *
213  (AddrExtractCost +
214  getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
215 
216  // Next, compute the cost of packing the result in a vector.
218  VT, Opcode != Instruction::Store, Opcode == Instruction::Store);
219 
220  InstructionCost ConditionalCost = 0;
221  if (VariableMask) {
222  // Compute the cost of conditionally executing the memory operations with
223  // variable masks. This includes extracting the individual conditions, a
224  // branches and PHIs to combine the results.
225  // NOTE: Estimating the cost of conditionally executing the memory
226  // operations accurately is quite difficult and the current solution
227  // provides a very rough estimate only.
228  ConditionalCost =
229  VT->getNumElements() *
231  Instruction::ExtractElement,
233  VT->getNumElements()),
234  -1) +
235  getCFInstrCost(Instruction::Br, CostKind) +
236  getCFInstrCost(Instruction::PHI, CostKind));
237  }
238 
239  return LoadCost + PackingCost + ConditionalCost;
240  }
241 
242 protected:
243  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
244  : BaseT(DL) {}
245  virtual ~BasicTTIImplBase() = default;
246 
248 
249 public:
250  /// \name Scalar TTI Implementations
251  /// @{
253  unsigned AddressSpace, Align Alignment,
254  bool *Fast) const {
256  return getTLI()->allowsMisalignedMemoryAccesses(
257  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
258  }
259 
260  bool hasBranchDivergence() { return false; }
261 
262  bool useGPUDivergenceAnalysis() { return false; }
263 
264  bool isSourceOfDivergence(const Value *V) { return false; }
265 
266  bool isAlwaysUniform(const Value *V) { return false; }
267 
268  unsigned getFlatAddressSpace() {
269  // Return an invalid address space.
270  return -1;
271  }
272 
274  Intrinsic::ID IID) const {
275  return false;
276  }
277 
278  bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
279  return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
280  }
281 
282  unsigned getAssumedAddrSpace(const Value *V) const {
283  return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
284  }
285 
286  std::pair<const Value *, unsigned>
287  getPredicatedAddrSpace(const Value *V) const {
288  return getTLI()->getTargetMachine().getPredicatedAddrSpace(V);
289  }
290 
292  Value *NewV) const {
293  return nullptr;
294  }
295 
296  bool isLegalAddImmediate(int64_t imm) {
297  return getTLI()->isLegalAddImmediate(imm);
298  }
299 
300  bool isLegalICmpImmediate(int64_t imm) {
301  return getTLI()->isLegalICmpImmediate(imm);
302  }
303 
304  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
305  bool HasBaseReg, int64_t Scale,
306  unsigned AddrSpace, Instruction *I = nullptr) {
308  AM.BaseGV = BaseGV;
309  AM.BaseOffs = BaseOffset;
310  AM.HasBaseReg = HasBaseReg;
311  AM.Scale = Scale;
312  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
313  }
314 
315  unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
316  Type *ScalarValTy) const {
317  auto &&IsSupportedByTarget = [this, ScalarMemTy, ScalarValTy](unsigned VF) {
318  auto *SrcTy = FixedVectorType::get(ScalarMemTy, VF / 2);
319  EVT VT = getTLI()->getValueType(DL, SrcTy);
320  if (getTLI()->isOperationLegal(ISD::STORE, VT) ||
321  getTLI()->isOperationCustom(ISD::STORE, VT))
322  return true;
323 
324  EVT ValVT =
325  getTLI()->getValueType(DL, FixedVectorType::get(ScalarValTy, VF / 2));
326  EVT LegalizedVT =
327  getTLI()->getTypeToTransformTo(ScalarMemTy->getContext(), VT);
328  return getTLI()->isTruncStoreLegal(LegalizedVT, ValVT);
329  };
330  while (VF > 2 && IsSupportedByTarget(VF))
331  VF /= 2;
332  return VF;
333  }
334 
336  const DataLayout &DL) const {
337  EVT VT = getTLI()->getValueType(DL, Ty);
338  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
339  }
340 
342  const DataLayout &DL) const {
343  EVT VT = getTLI()->getValueType(DL, Ty);
344  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
345  }
346 
349  }
350 
353  }
354 
357  }
358 
360  int64_t BaseOffset, bool HasBaseReg,
361  int64_t Scale, unsigned AddrSpace) {
363  AM.BaseGV = BaseGV;
364  AM.BaseOffs = BaseOffset;
365  AM.HasBaseReg = HasBaseReg;
366  AM.Scale = Scale;
367  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
368  }
369 
370  bool isTruncateFree(Type *Ty1, Type *Ty2) {
371  return getTLI()->isTruncateFree(Ty1, Ty2);
372  }
373 
375  return getTLI()->isProfitableToHoist(I);
376  }
377 
378  bool useAA() const { return getST()->useAA(); }
379 
380  bool isTypeLegal(Type *Ty) {
381  EVT VT = getTLI()->getValueType(DL, Ty);
382  return getTLI()->isTypeLegal(VT);
383  }
384 
386  InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first;
387  assert(Val >= 0 && "Negative cost!");
388  return Val;
389  }
390 
391  InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
394  return BaseT::getGEPCost(PointeeType, Ptr, Operands, CostKind);
395  }
396 
398  unsigned &JumpTableSize,
399  ProfileSummaryInfo *PSI,
401  /// Try to find the estimated number of clusters. Note that the number of
402  /// clusters identified in this function could be different from the actual
403  /// numbers found in lowering. This function ignore switches that are
404  /// lowered with a mix of jump table / bit test / BTree. This function was
405  /// initially intended to be used when estimating the cost of switch in
406  /// inline cost heuristic, but it's a generic cost model to be used in other
407  /// places (e.g., in loop unrolling).
408  unsigned N = SI.getNumCases();
409  const TargetLoweringBase *TLI = getTLI();
410  const DataLayout &DL = this->getDataLayout();
411 
412  JumpTableSize = 0;
413  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
414 
415  // Early exit if both a jump table and bit test are not allowed.
416  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
417  return N;
418 
419  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
420  APInt MinCaseVal = MaxCaseVal;
421  for (auto CI : SI.cases()) {
422  const APInt &CaseVal = CI.getCaseValue()->getValue();
423  if (CaseVal.sgt(MaxCaseVal))
424  MaxCaseVal = CaseVal;
425  if (CaseVal.slt(MinCaseVal))
426  MinCaseVal = CaseVal;
427  }
428 
429  // Check if suitable for a bit test
430  if (N <= DL.getIndexSizeInBits(0u)) {
432  for (auto I : SI.cases())
433  Dests.insert(I.getCaseSuccessor());
434 
435  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
436  DL))
437  return 1;
438  }
439 
440  // Check if suitable for a jump table.
441  if (IsJTAllowed) {
442  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
443  return N;
444  uint64_t Range =
445  (MaxCaseVal - MinCaseVal)
446  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
447  // Check whether a range of clusters is dense enough for a jump table
448  if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
449  JumpTableSize = Range;
450  return 1;
451  }
452  }
453  return N;
454  }
455 
457  const TargetLoweringBase *TLI = getTLI();
460  }
461 
463  const TargetMachine &TM = getTLI()->getTargetMachine();
464  // If non-PIC mode, do not generate a relative lookup table.
465  if (!TM.isPositionIndependent())
466  return false;
467 
468  /// Relative lookup table entries consist of 32-bit offsets.
469  /// Do not generate relative lookup tables for large code models
470  /// in 64-bit achitectures where 32-bit offsets might not be enough.
471  if (TM.getCodeModel() == CodeModel::Medium ||
472  TM.getCodeModel() == CodeModel::Large)
473  return false;
474 
475  Triple TargetTriple = TM.getTargetTriple();
476  if (!TargetTriple.isArch64Bit())
477  return false;
478 
479  // TODO: Triggers issues on aarch64 on darwin, so temporarily disable it
480  // there.
481  if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin())
482  return false;
483 
484  return true;
485  }
486 
487  bool haveFastSqrt(Type *Ty) {
488  const TargetLoweringBase *TLI = getTLI();
489  EVT VT = TLI->getValueType(DL, Ty);
490  return TLI->isTypeLegal(VT) &&
492  }
493 
495  return true;
496  }
497 
499  // Check whether FADD is available, as a proxy for floating-point in
500  // general.
501  const TargetLoweringBase *TLI = getTLI();
502  EVT VT = TLI->getValueType(DL, Ty);
506  }
507 
508  unsigned getInliningThresholdMultiplier() { return 1; }
509  unsigned adjustInliningThreshold(const CallBase *CB) { return 0; }
510 
511  int getInlinerVectorBonusPercent() { return 150; }
512 
516  // This unrolling functionality is target independent, but to provide some
517  // motivation for its intended use, for x86:
518 
519  // According to the Intel 64 and IA-32 Architectures Optimization Reference
520  // Manual, Intel Core models and later have a loop stream detector (and
521  // associated uop queue) that can benefit from partial unrolling.
522  // The relevant requirements are:
523  // - The loop must have no more than 4 (8 for Nehalem and later) branches
524  // taken, and none of them may be calls.
525  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
526 
527  // According to the Software Optimization Guide for AMD Family 15h
528  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
529  // and loop buffer which can benefit from partial unrolling.
530  // The relevant requirements are:
531  // - The loop must have fewer than 16 branches
532  // - The loop must have less than 40 uops in all executed loop branches
533 
534  // The number of taken branches in a loop is hard to estimate here, and
535  // benchmarking has revealed that it is better not to be conservative when
536  // estimating the branch count. As a result, we'll ignore the branch limits
537  // until someone finds a case where it matters in practice.
538 
539  unsigned MaxOps;
540  const TargetSubtargetInfo *ST = getST();
541  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
542  MaxOps = PartialUnrollingThreshold;
543  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
544  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
545  else
546  return;
547 
548  // Scan the loop: don't unroll loops with calls.
549  for (BasicBlock *BB : L->blocks()) {
550  for (Instruction &I : *BB) {
551  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
552  if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
553  if (!thisT()->isLoweredToCall(F))
554  continue;
555  }
556 
557  if (ORE) {
558  ORE->emit([&]() {
559  return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(),
560  L->getHeader())
561  << "advising against unrolling the loop because it "
562  "contains a "
563  << ore::NV("Call", &I);
564  });
565  }
566  return;
567  }
568  }
569  }
570 
571  // Enable runtime and partial unrolling up to the specified size.
572  // Enable using trip count upper bound to unroll loops.
573  UP.Partial = UP.Runtime = UP.UpperBound = true;
574  UP.PartialThreshold = MaxOps;
575 
576  // Avoid unrolling when optimizing for size.
577  UP.OptSizeThreshold = 0;
579 
580  // Set number of instructions optimized when "back edge"
581  // becomes "fall through" to default value of 2.
582  UP.BEInsns = 2;
583  }
584 
587  PP.PeelCount = 0;
588  PP.AllowPeeling = true;
589  PP.AllowLoopNestsPeeling = false;
590  PP.PeelProfiledIterations = true;
591  }
592 
594  AssumptionCache &AC,
595  TargetLibraryInfo *LibInfo,
596  HardwareLoopInfo &HWLoopInfo) {
597  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
598  }
599 
602  DominatorTree *DT,
603  const LoopAccessInfo *LAI) {
604  return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
605  }
606 
609  }
610 
612  IntrinsicInst &II) {
613  return BaseT::instCombineIntrinsic(IC, II);
614  }
615 
617  IntrinsicInst &II,
618  APInt DemandedMask,
619  KnownBits &Known,
620  bool &KnownBitsComputed) {
621  return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
622  KnownBitsComputed);
623  }
624 
626  InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
627  APInt &UndefElts2, APInt &UndefElts3,
628  std::function<void(Instruction *, unsigned, APInt, APInt &)>
629  SimplifyAndSetOp) {
631  IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
632  SimplifyAndSetOp);
633  }
634 
636  if (isa<LoadInst>(I))
637  return getST()->getSchedModel().DefaultLoadLatency;
638 
640  }
641 
642  virtual Optional<unsigned>
644  return Optional<unsigned>(
645  getST()->getCacheSize(static_cast<unsigned>(Level)));
646  }
647 
648  virtual Optional<unsigned>
650  Optional<unsigned> TargetResult =
651  getST()->getCacheAssociativity(static_cast<unsigned>(Level));
652 
653  if (TargetResult)
654  return TargetResult;
655 
657  }
658 
659  virtual unsigned getCacheLineSize() const {
660  return getST()->getCacheLineSize();
661  }
662 
663  virtual unsigned getPrefetchDistance() const {
664  return getST()->getPrefetchDistance();
665  }
666 
667  virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
668  unsigned NumStridedMemAccesses,
669  unsigned NumPrefetches,
670  bool HasCall) const {
671  return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
672  NumPrefetches, HasCall);
673  }
674 
675  virtual unsigned getMaxPrefetchIterationsAhead() const {
676  return getST()->getMaxPrefetchIterationsAhead();
677  }
678 
679  virtual bool enableWritePrefetching() const {
680  return getST()->enableWritePrefetching();
681  }
682 
683  /// @}
684 
685  /// \name Vector TTI Implementations
686  /// @{
687 
689  return TypeSize::getFixed(32);
690  }
691 
694 
695  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
696  /// are set if the demanded result elements need to be inserted and/or
697  /// extracted from vectors.
699  const APInt &DemandedElts,
700  bool Insert, bool Extract) {
701  /// FIXME: a bitfield is not a reasonable abstraction for talking about
702  /// which elements are needed from a scalable vector
703  auto *Ty = cast<FixedVectorType>(InTy);
704 
705  assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
706  "Vector size mismatch");
707 
708  InstructionCost Cost = 0;
709 
710  for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
711  if (!DemandedElts[i])
712  continue;
713  if (Insert)
714  Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
715  if (Extract)
716  Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
717  }
718 
719  return Cost;
720  }
721 
722  /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
724  bool Extract) {
725  auto *Ty = cast<FixedVectorType>(InTy);
726 
727  APInt DemandedElts = APInt::getAllOnes(Ty->getNumElements());
728  return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
729  }
730 
731  /// Estimate the overhead of scalarizing an instructions unique
732  /// non-constant operands. The (potentially vector) types to use for each of
733  /// argument are passes via Tys.
735  ArrayRef<Type *> Tys) {
736  assert(Args.size() == Tys.size() && "Expected matching Args and Tys");
737 
738  InstructionCost Cost = 0;
739  SmallPtrSet<const Value*, 4> UniqueOperands;
740  for (int I = 0, E = Args.size(); I != E; I++) {
741  // Disregard things like metadata arguments.
742  const Value *A = Args[I];
743  Type *Ty = Tys[I];
744  if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
745  !Ty->isPtrOrPtrVectorTy())
746  continue;
747 
748  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
749  if (auto *VecTy = dyn_cast<VectorType>(Ty))
750  Cost += getScalarizationOverhead(VecTy, false, true);
751  }
752  }
753 
754  return Cost;
755  }
756 
757  /// Estimate the overhead of scalarizing the inputs and outputs of an
758  /// instruction, with return type RetTy and arguments Args of type Tys. If
759  /// Args are unknown (empty), then the cost associated with one argument is
760  /// added as a heuristic.
763  ArrayRef<Type *> Tys) {
764  InstructionCost Cost = getScalarizationOverhead(RetTy, true, false);
765  if (!Args.empty())
767  else
768  // When no information on arguments is provided, we add the cost
769  // associated with one argument as a heuristic.
770  Cost += getScalarizationOverhead(RetTy, false, true);
771 
772  return Cost;
773  }
774 
775  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
776 
778  unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
784  const Instruction *CxtI = nullptr) {
785  // Check if any of the operands are vector operands.
786  const TargetLoweringBase *TLI = getTLI();
787  int ISD = TLI->InstructionOpcodeToISD(Opcode);
788  assert(ISD && "Invalid opcode");
789 
790  // TODO: Handle more cost kinds.
792  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
793  Opd1Info, Opd2Info,
794  Opd1PropInfo, Opd2PropInfo,
795  Args, CxtI);
796 
797  std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
798 
799  bool IsFloat = Ty->isFPOrFPVectorTy();
800  // Assume that floating point arithmetic operations cost twice as much as
801  // integer operations.
802  InstructionCost OpCost = (IsFloat ? 2 : 1);
803 
804  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
805  // The operation is legal. Assume it costs 1.
806  // TODO: Once we have extract/insert subvector cost we need to use them.
807  return LT.first * OpCost;
808  }
809 
810  if (!TLI->isOperationExpand(ISD, LT.second)) {
811  // If the operation is custom lowered, then assume that the code is twice
812  // as expensive.
813  return LT.first * 2 * OpCost;
814  }
815 
816  // An 'Expand' of URem and SRem is special because it may default
817  // to expanding the operation into a sequence of sub-operations
818  // i.e. X % Y -> X-(X/Y)*Y.
819  if (ISD == ISD::UREM || ISD == ISD::SREM) {
820  bool IsSigned = ISD == ISD::SREM;
821  if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM,
822  LT.second) ||
823  TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV,
824  LT.second)) {
825  unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv;
826  InstructionCost DivCost = thisT()->getArithmeticInstrCost(
827  DivOpc, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo,
828  Opd2PropInfo);
829  InstructionCost MulCost =
830  thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind);
831  InstructionCost SubCost =
832  thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind);
833  return DivCost + MulCost + SubCost;
834  }
835  }
836 
837  // We cannot scalarize scalable vectors, so return Invalid.
838  if (isa<ScalableVectorType>(Ty))
840 
841  // Else, assume that we need to scalarize this op.
842  // TODO: If one of the types get legalized by splitting, handle this
843  // similarly to what getCastInstrCost() does.
844  if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
845  InstructionCost Cost = thisT()->getArithmeticInstrCost(
846  Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
847  Opd1PropInfo, Opd2PropInfo, Args, CxtI);
848  // Return the cost of multiple scalar invocation plus the cost of
849  // inserting and extracting the values.
850  SmallVector<Type *> Tys(Args.size(), Ty);
851  return getScalarizationOverhead(VTy, Args, Tys) +
852  VTy->getNumElements() * Cost;
853  }
854 
855  // We don't know anything about this scalar instruction.
856  return OpCost;
857  }
858 
860  ArrayRef<int> Mask) const {
861  int Limit = Mask.size() * 2;
862  if (Mask.empty() ||
863  // Extra check required by isSingleSourceMaskImpl function (called by
864  // ShuffleVectorInst::isSingleSourceMask).
865  any_of(Mask, [Limit](int I) { return I >= Limit; }))
866  return Kind;
867  switch (Kind) {
870  return TTI::SK_Reverse;
872  return TTI::SK_Broadcast;
873  break;
876  return TTI::SK_Select;
878  return TTI::SK_Transpose;
879  break;
880  case TTI::SK_Select:
881  case TTI::SK_Reverse:
882  case TTI::SK_Broadcast:
883  case TTI::SK_Transpose:
886  case TTI::SK_Splice:
887  break;
888  }
889  return Kind;
890  }
891 
893  ArrayRef<int> Mask, int Index,
894  VectorType *SubTp,
896 
898  case TTI::SK_Broadcast:
899  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
900  return getBroadcastShuffleOverhead(FVT);
902  case TTI::SK_Select:
903  case TTI::SK_Splice:
904  case TTI::SK_Reverse:
905  case TTI::SK_Transpose:
908  if (auto *FVT = dyn_cast<FixedVectorType>(Tp))
909  return getPermuteShuffleOverhead(FVT);
912  return getExtractSubvectorOverhead(Tp, Index,
913  cast<FixedVectorType>(SubTp));
915  return getInsertSubvectorOverhead(Tp, Index,
916  cast<FixedVectorType>(SubTp));
917  }
918  llvm_unreachable("Unknown TTI::ShuffleKind");
919  }
920 
921  InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
924  const Instruction *I = nullptr) {
925  if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
926  return 0;
927 
928  const TargetLoweringBase *TLI = getTLI();
929  int ISD = TLI->InstructionOpcodeToISD(Opcode);
930  assert(ISD && "Invalid opcode");
931  std::pair<InstructionCost, MVT> SrcLT =
932  TLI->getTypeLegalizationCost(DL, Src);
933  std::pair<InstructionCost, MVT> DstLT =
934  TLI->getTypeLegalizationCost(DL, Dst);
935 
936  TypeSize SrcSize = SrcLT.second.getSizeInBits();
937  TypeSize DstSize = DstLT.second.getSizeInBits();
938  bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
939  bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
940 
941  switch (Opcode) {
942  default:
943  break;
944  case Instruction::Trunc:
945  // Check for NOOP conversions.
946  if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
947  return 0;
949  case Instruction::BitCast:
950  // Bitcast between types that are legalized to the same type are free and
951  // assume int to/from ptr of the same size is also free.
952  if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
953  SrcSize == DstSize)
954  return 0;
955  break;
956  case Instruction::FPExt:
957  if (I && getTLI()->isExtFree(I))
958  return 0;
959  break;
960  case Instruction::ZExt:
961  if (TLI->isZExtFree(SrcLT.second, DstLT.second))
962  return 0;
964  case Instruction::SExt:
965  if (I && getTLI()->isExtFree(I))
966  return 0;
967 
968  // If this is a zext/sext of a load, return 0 if the corresponding
969  // extending load exists on target and the result type is legal.
970  if (CCH == TTI::CastContextHint::Normal) {
971  EVT ExtVT = EVT::getEVT(Dst);
972  EVT LoadVT = EVT::getEVT(Src);
973  unsigned LType =
974  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
975  if (DstLT.first == SrcLT.first &&
976  TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
977  return 0;
978  }
979  break;
980  case Instruction::AddrSpaceCast:
981  if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
982  Dst->getPointerAddressSpace()))
983  return 0;
984  break;
985  }
986 
987  auto *SrcVTy = dyn_cast<VectorType>(Src);
988  auto *DstVTy = dyn_cast<VectorType>(Dst);
989 
990  // If the cast is marked as legal (or promote) then assume low cost.
991  if (SrcLT.first == DstLT.first &&
992  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
993  return SrcLT.first;
994 
995  // Handle scalar conversions.
996  if (!SrcVTy && !DstVTy) {
997  // Just check the op cost. If the operation is legal then assume it costs
998  // 1.
999  if (!TLI->isOperationExpand(ISD, DstLT.second))
1000  return 1;
1001 
1002  // Assume that illegal scalar instruction are expensive.
1003  return 4;
1004  }
1005 
1006  // Check vector-to-vector casts.
1007  if (DstVTy && SrcVTy) {
1008  // If the cast is between same-sized registers, then the check is simple.
1009  if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
1010 
1011  // Assume that Zext is done using AND.
1012  if (Opcode == Instruction::ZExt)
1013  return SrcLT.first;
1014 
1015  // Assume that sext is done using SHL and SRA.
1016  if (Opcode == Instruction::SExt)
1017  return SrcLT.first * 2;
1018 
1019  // Just check the op cost. If the operation is legal then assume it
1020  // costs
1021  // 1 and multiply by the type-legalization overhead.
1022  if (!TLI->isOperationExpand(ISD, DstLT.second))
1023  return SrcLT.first * 1;
1024  }
1025 
1026  // If we are legalizing by splitting, query the concrete TTI for the cost
1027  // of casting the original vector twice. We also need to factor in the
1028  // cost of the split itself. Count that as 1, to be consistent with
1029  // TLI->getTypeLegalizationCost().
1030  bool SplitSrc =
1031  TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
1033  bool SplitDst =
1034  TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
1036  if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() &&
1037  DstVTy->getElementCount().isVector()) {
1038  Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
1039  Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
1040  T *TTI = static_cast<T *>(this);
1041  // If both types need to be split then the split is free.
1042  InstructionCost SplitCost =
1043  (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
1044  return SplitCost +
1045  (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
1046  CostKind, I));
1047  }
1048 
1049  // Scalarization cost is Invalid, can't assume any num elements.
1050  if (isa<ScalableVectorType>(DstVTy))
1051  return InstructionCost::getInvalid();
1052 
1053  // In other cases where the source or destination are illegal, assume
1054  // the operation will get scalarized.
1055  unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
1056  InstructionCost Cost = thisT()->getCastInstrCost(
1057  Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
1058 
1059  // Return the cost of multiple scalar invocation plus the cost of
1060  // inserting and extracting the values.
1061  return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
1062  }
1063 
1064  // We already handled vector-to-vector and scalar-to-scalar conversions.
1065  // This
1066  // is where we handle bitcast between vectors and scalars. We need to assume
1067  // that the conversion is scalarized in one way or another.
1068  if (Opcode == Instruction::BitCast) {
1069  // Illegal bitcasts are done by storing and loading from a stack slot.
1070  return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
1071  (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
1072  }
1073 
1074  llvm_unreachable("Unhandled cast");
1075  }
1076 
1078  VectorType *VecTy, unsigned Index) {
1079  return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
1080  Index) +
1081  thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
1084  }
1085 
1087  const Instruction *I = nullptr) {
1088  return BaseT::getCFInstrCost(Opcode, CostKind, I);
1089  }
1090 
1091  InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1092  CmpInst::Predicate VecPred,
1094  const Instruction *I = nullptr) {
1095  const TargetLoweringBase *TLI = getTLI();
1096  int ISD = TLI->InstructionOpcodeToISD(Opcode);
1097  assert(ISD && "Invalid opcode");
1098 
1099  // TODO: Handle other cost kinds.
1101  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
1102  I);
1103 
1104  // Selects on vectors are actually vector selects.
1105  if (ISD == ISD::SELECT) {
1106  assert(CondTy && "CondTy must exist");
1107  if (CondTy->isVectorTy())
1108  ISD = ISD::VSELECT;
1109  }
1110  std::pair<InstructionCost, MVT> LT =
1111  TLI->getTypeLegalizationCost(DL, ValTy);
1112 
1113  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
1114  !TLI->isOperationExpand(ISD, LT.second)) {
1115  // The operation is legal. Assume it costs 1. Multiply
1116  // by the type-legalization overhead.
1117  return LT.first * 1;
1118  }
1119 
1120  // Otherwise, assume that the cast is scalarized.
1121  // TODO: If one of the types get legalized by splitting, handle this
1122  // similarly to what getCastInstrCost() does.
1123  if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
1124  unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
1125  if (CondTy)
1126  CondTy = CondTy->getScalarType();
1127  InstructionCost Cost = thisT()->getCmpSelInstrCost(
1128  Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
1129 
1130  // Return the cost of multiple scalar invocation plus the cost of
1131  // inserting and extracting the values.
1132  return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
1133  }
1134 
1135  // Unknown scalar opcode.
1136  return 1;
1137  }
1138 
1139  InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1140  unsigned Index) {
1141  std::pair<InstructionCost, MVT> LT =
1142  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
1143 
1144  return LT.first;
1145  }
1146 
1147  InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1148  int VF,
1149  const APInt &DemandedDstElts,
1151  assert(DemandedDstElts.getBitWidth() == (unsigned)VF * ReplicationFactor &&
1152  "Unexpected size of DemandedDstElts.");
1153 
1154  InstructionCost Cost;
1155 
1156  auto *SrcVT = FixedVectorType::get(EltTy, VF);
1157  auto *ReplicatedVT = FixedVectorType::get(EltTy, VF * ReplicationFactor);
1158 
1159  // The Mask shuffling cost is extract all the elements of the Mask
1160  // and insert each of them Factor times into the wide vector:
1161  //
1162  // E.g. an interleaved group with factor 3:
1163  // %mask = icmp ult <8 x i32> %vec1, %vec2
1164  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1165  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1166  // The cost is estimated as extract all mask elements from the <8xi1> mask
1167  // vector and insert them factor times into the <24xi1> shuffled mask
1168  // vector.
1169  APInt DemandedSrcElts = APIntOps::ScaleBitMask(DemandedDstElts, VF);
1170  Cost += thisT()->getScalarizationOverhead(SrcVT, DemandedSrcElts,
1171  /*Insert*/ false,
1172  /*Extract*/ true);
1173  Cost +=
1174  thisT()->getScalarizationOverhead(ReplicatedVT, DemandedDstElts,
1175  /*Insert*/ true, /*Extract*/ false);
1176 
1177  return Cost;
1178  }
1179 
1180  InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src,
1181  MaybeAlign Alignment, unsigned AddressSpace,
1183  const Instruction *I = nullptr) {
1184  assert(!Src->isVoidTy() && "Invalid type");
1185  // Assume types, such as structs, are expensive.
1186  if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
1187  return 4;
1188  std::pair<InstructionCost, MVT> LT =
1189  getTLI()->getTypeLegalizationCost(DL, Src);
1190 
1191  // Assuming that all loads of legal types cost 1.
1192  InstructionCost Cost = LT.first;
1194  return Cost;
1195 
1196  if (Src->isVectorTy() &&
1197  // In practice it's not currently possible to have a change in lane
1198  // length for extending loads or truncating stores so both types should
1199  // have the same scalable property.
1200  TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
1201  LT.second.getSizeInBits())) {
1202  // This is a vector load that legalizes to a larger type than the vector
1203  // itself. Unless the corresponding extending load or truncating store is
1204  // legal, then this will scalarize.
1206  EVT MemVT = getTLI()->getValueType(DL, Src);
1207  if (Opcode == Instruction::Store)
1208  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
1209  else
1210  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
1211 
1212  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
1213  // This is a vector load/store for some illegal type that is scalarized.
1214  // We must account for the cost of building or decomposing the vector.
1215  Cost += getScalarizationOverhead(cast<VectorType>(Src),
1216  Opcode != Instruction::Store,
1217  Opcode == Instruction::Store);
1218  }
1219  }
1220 
1221  return Cost;
1222  }
1223 
1224  InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy,
1225  Align Alignment, unsigned AddressSpace,
1227  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false,
1228  CostKind);
1229  }
1230 
1231  InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1232  const Value *Ptr, bool VariableMask,
1233  Align Alignment,
1235  const Instruction *I = nullptr) {
1236  return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask,
1237  true, CostKind);
1238  }
1239 
1241  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1242  Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1243  bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1244  auto *VT = cast<FixedVectorType>(VecTy);
1245 
1246  unsigned NumElts = VT->getNumElements();
1247  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1248 
1249  unsigned NumSubElts = NumElts / Factor;
1250  auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
1251 
1252  // Firstly, the cost of load/store operation.
1253  InstructionCost Cost;
1254  if (UseMaskForCond || UseMaskForGaps)
1255  Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1257  else
1258  Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1259  CostKind);
1260 
1261  // Legalize the vector type, and get the legalized and unlegalized type
1262  // sizes.
1263  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
1264  unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
1265  unsigned VecTyLTSize = VecTyLT.getStoreSize();
1266 
1267  // Scale the cost of the memory operation by the fraction of legalized
1268  // instructions that will actually be used. We shouldn't account for the
1269  // cost of dead instructions since they will be removed.
1270  //
1271  // E.g., An interleaved load of factor 8:
1272  // %vec = load <16 x i64>, <16 x i64>* %ptr
1273  // %v0 = shufflevector %vec, undef, <0, 8>
1274  //
1275  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1276  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1277  // type). The other loads are unused.
1278  //
1279  // TODO: Note that legalization can turn masked loads/stores into unmasked
1280  // (legalized) loads/stores. This can be reflected in the cost.
1281  if (Cost.isValid() && VecTySize > VecTyLTSize) {
1282  // The number of loads of a legal type it will take to represent a load
1283  // of the unlegalized vector type.
1284  unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize);
1285 
1286  // The number of elements of the unlegalized type that correspond to a
1287  // single legal instruction.
1288  unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts);
1289 
1290  // Determine which legal instructions will be used.
1291  BitVector UsedInsts(NumLegalInsts, false);
1292  for (unsigned Index : Indices)
1293  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1294  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1295 
1296  // Scale the cost of the load by the fraction of legal instructions that
1297  // will be used.
1298  Cost = divideCeil(UsedInsts.count() * Cost.getValue().getValue(),
1299  NumLegalInsts);
1300  }
1301 
1302  // Then plus the cost of interleave operation.
1303  assert(Indices.size() <= Factor &&
1304  "Interleaved memory op has too many members");
1305 
1306  const APInt DemandedAllSubElts = APInt::getAllOnes(NumSubElts);
1307  const APInt DemandedAllResultElts = APInt::getAllOnes(NumElts);
1308 
1309  APInt DemandedLoadStoreElts = APInt::getZero(NumElts);
1310  for (unsigned Index : Indices) {
1311  assert(Index < Factor && "Invalid index for interleaved memory op");
1312  for (unsigned Elm = 0; Elm < NumSubElts; Elm++)
1313  DemandedLoadStoreElts.setBit(Index + Elm * Factor);
1314  }
1315 
1316  if (Opcode == Instruction::Load) {
1317  // The interleave cost is similar to extract sub vectors' elements
1318  // from the wide vector, and insert them into sub vectors.
1319  //
1320  // E.g. An interleaved load of factor 2 (with one member of index 0):
1321  // %vec = load <8 x i32>, <8 x i32>* %ptr
1322  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1323  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1324  // <8 x i32> vector and insert them into a <4 x i32> vector.
1325  InstructionCost InsSubCost =
1326  thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1327  /*Insert*/ true, /*Extract*/ false);
1328  Cost += Indices.size() * InsSubCost;
1329  Cost +=
1330  thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1331  /*Insert*/ false, /*Extract*/ true);
1332  } else {
1333  // The interleave cost is extract elements from sub vectors, and
1334  // insert them into the wide vector.
1335  //
1336  // E.g. An interleaved store of factor 3 with 2 members at indices 0,1:
1337  // (using VF=4):
1338  // %v0_v1 = shuffle %v0, %v1, <0,4,undef,1,5,undef,2,6,undef,3,7,undef>
1339  // %gaps.mask = <true, true, false, true, true, false,
1340  // true, true, false, true, true, false>
1341  // call llvm.masked.store <12 x i32> %v0_v1, <12 x i32>* %ptr,
1342  // i32 Align, <12 x i1> %gaps.mask
1343  // The cost is estimated as extract all elements (of actual members,
1344  // excluding gaps) from both <4 x i32> vectors and insert into the <12 x
1345  // i32> vector.
1346  InstructionCost ExtSubCost =
1347  thisT()->getScalarizationOverhead(SubVT, DemandedAllSubElts,
1348  /*Insert*/ false, /*Extract*/ true);
1349  Cost += ExtSubCost * Indices.size();
1350  Cost += thisT()->getScalarizationOverhead(VT, DemandedLoadStoreElts,
1351  /*Insert*/ true,
1352  /*Extract*/ false);
1353  }
1354 
1355  if (!UseMaskForCond)
1356  return Cost;
1357 
1358  Type *I8Type = Type::getInt8Ty(VT->getContext());
1359 
1360  Cost += thisT()->getReplicationShuffleCost(
1361  I8Type, Factor, NumSubElts,
1362  UseMaskForGaps ? DemandedLoadStoreElts : DemandedAllResultElts,
1363  CostKind);
1364 
1365  // The Gaps mask is invariant and created outside the loop, therefore the
1366  // cost of creating it is not accounted for here. However if we have both
1367  // a MaskForGaps and some other mask that guards the execution of the
1368  // memory access, we need to account for the cost of And-ing the two masks
1369  // inside the loop.
1370  if (UseMaskForGaps) {
1371  auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1372  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1373  CostKind);
1374  }
1375 
1376  return Cost;
1377  }
1378 
1379  /// Get intrinsic cost based on arguments.
1382  // Check for generically free intrinsics.
1383  if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1384  return 0;
1385 
1386  // Assume that target intrinsics are cheap.
1387  Intrinsic::ID IID = ICA.getID();
1388  if (Function::isTargetIntrinsic(IID))
1390 
1391  if (ICA.isTypeBasedOnly())
1393 
1394  Type *RetTy = ICA.getReturnType();
1395 
1396  ElementCount RetVF =
1397  (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1398  : ElementCount::getFixed(1));
1399  const IntrinsicInst *I = ICA.getInst();
1401  FastMathFlags FMF = ICA.getFlags();
1402  switch (IID) {
1403  default:
1404  break;
1405 
1406  case Intrinsic::cttz:
1407  // FIXME: If necessary, this should go in target-specific overrides.
1408  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz())
1410  break;
1411 
1412  case Intrinsic::ctlz:
1413  // FIXME: If necessary, this should go in target-specific overrides.
1414  if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz())
1416  break;
1417 
1418  case Intrinsic::memcpy:
1419  return thisT()->getMemcpyCost(ICA.getInst());
1420 
1421  case Intrinsic::masked_scatter: {
1422  const Value *Mask = Args[3];
1423  bool VarMask = !isa<Constant>(Mask);
1424  Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1425  return thisT()->getGatherScatterOpCost(Instruction::Store,
1426  ICA.getArgTypes()[0], Args[1],
1427  VarMask, Alignment, CostKind, I);
1428  }
1429  case Intrinsic::masked_gather: {
1430  const Value *Mask = Args[2];
1431  bool VarMask = !isa<Constant>(Mask);
1432  Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1433  return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1434  VarMask, Alignment, CostKind, I);
1435  }
1436  case Intrinsic::experimental_stepvector: {
1437  if (isa<ScalableVectorType>(RetTy))
1439  // The cost of materialising a constant integer vector.
1441  }
1442  case Intrinsic::experimental_vector_extract: {
1443  // FIXME: Handle case where a scalable vector is extracted from a scalable
1444  // vector
1445  if (isa<ScalableVectorType>(RetTy))
1447  unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1448  return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1449  cast<VectorType>(Args[0]->getType()), None,
1450  Index, cast<VectorType>(RetTy));
1451  }
1452  case Intrinsic::experimental_vector_insert: {
1453  // FIXME: Handle case where a scalable vector is inserted into a scalable
1454  // vector
1455  if (isa<ScalableVectorType>(Args[1]->getType()))
1457  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1458  return thisT()->getShuffleCost(
1459  TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None,
1460  Index, cast<VectorType>(Args[1]->getType()));
1461  }
1462  case Intrinsic::experimental_vector_reverse: {
1463  return thisT()->getShuffleCost(TTI::SK_Reverse,
1464  cast<VectorType>(Args[0]->getType()), None,
1465  0, cast<VectorType>(RetTy));
1466  }
1467  case Intrinsic::experimental_vector_splice: {
1468  unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1469  return thisT()->getShuffleCost(TTI::SK_Splice,
1470  cast<VectorType>(Args[0]->getType()), None,
1471  Index, cast<VectorType>(RetTy));
1472  }
1473  case Intrinsic::vector_reduce_add:
1474  case Intrinsic::vector_reduce_mul:
1475  case Intrinsic::vector_reduce_and:
1476  case Intrinsic::vector_reduce_or:
1477  case Intrinsic::vector_reduce_xor:
1478  case Intrinsic::vector_reduce_smax:
1479  case Intrinsic::vector_reduce_smin:
1480  case Intrinsic::vector_reduce_fmax:
1481  case Intrinsic::vector_reduce_fmin:
1482  case Intrinsic::vector_reduce_umax:
1483  case Intrinsic::vector_reduce_umin: {
1484  IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1);
1486  }
1487  case Intrinsic::vector_reduce_fadd:
1488  case Intrinsic::vector_reduce_fmul: {
1490  IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1);
1492  }
1493  case Intrinsic::fshl:
1494  case Intrinsic::fshr: {
1495  if (isa<ScalableVectorType>(RetTy))
1497  const Value *X = Args[0];
1498  const Value *Y = Args[1];
1499  const Value *Z = Args[2];
1500  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1501  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1502  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1503  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1505  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1506  : TTI::OP_None;
1507  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1508  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1509  InstructionCost Cost = 0;
1510  Cost +=
1511  thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1512  Cost +=
1513  thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1514  Cost += thisT()->getArithmeticInstrCost(
1515  BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
1516  Cost += thisT()->getArithmeticInstrCost(
1517  BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
1518  // Non-constant shift amounts requires a modulo.
1519  if (OpKindZ != TTI::OK_UniformConstantValue &&
1521  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1522  CostKind, OpKindZ, OpKindBW,
1523  OpPropsZ, OpPropsBW);
1524  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1525  if (X != Y) {
1526  Type *CondTy = RetTy->getWithNewBitWidth(1);
1527  Cost +=
1528  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1530  Cost +=
1531  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1533  }
1534  return Cost;
1535  }
1536  case Intrinsic::get_active_lane_mask: {
1537  EVT ResVT = getTLI()->getValueType(DL, RetTy, true);
1538  EVT ArgType = getTLI()->getValueType(DL, ICA.getArgTypes()[0], true);
1539 
1540  // If we're not expanding the intrinsic then we assume this is cheap
1541  // to implement.
1542  if (!getTLI()->shouldExpandGetActiveLaneMask(ResVT, ArgType)) {
1543  std::pair<InstructionCost, MVT> LT =
1544  getTLI()->getTypeLegalizationCost(DL, RetTy);
1545  return LT.first;
1546  }
1547 
1548  // Create the expanded types that will be used to calculate the uadd_sat
1549  // operation.
1550  Type *ExpRetTy = VectorType::get(
1551  ICA.getArgTypes()[0], cast<VectorType>(RetTy)->getElementCount());
1552  IntrinsicCostAttributes Attrs(Intrinsic::uadd_sat, ExpRetTy, {}, FMF);
1553  InstructionCost Cost =
1554  thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1555  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, ExpRetTy, RetTy,
1557  return Cost;
1558  }
1559  }
1560 
1561  // Assume that we need to scalarize this intrinsic.
1562  // Compute the scalarization overhead based on Args for a vector
1563  // intrinsic.
1564  InstructionCost ScalarizationCost = InstructionCost::getInvalid();
1565  if (RetVF.isVector() && !RetVF.isScalable()) {
1566  ScalarizationCost = 0;
1567  if (!RetTy->isVoidTy())
1568  ScalarizationCost +=
1569  getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1570  ScalarizationCost +=
1572  }
1573 
1574  IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I,
1575  ScalarizationCost);
1576  return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1577  }
1578 
1579  /// Get intrinsic cost based on argument types.
1580  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1581  /// cost of scalarizing the arguments and the return value will be computed
1582  /// based on types.
1586  Intrinsic::ID IID = ICA.getID();
1587  Type *RetTy = ICA.getReturnType();
1588  const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1589  FastMathFlags FMF = ICA.getFlags();
1590  InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost();
1591  bool SkipScalarizationCost = ICA.skipScalarizationCost();
1592 
1593  VectorType *VecOpTy = nullptr;
1594  if (!Tys.empty()) {
1595  // The vector reduction operand is operand 0 except for fadd/fmul.
1596  // Their operand 0 is a scalar start value, so the vector op is operand 1.
1597  unsigned VecTyIndex = 0;
1598  if (IID == Intrinsic::vector_reduce_fadd ||
1599  IID == Intrinsic::vector_reduce_fmul)
1600  VecTyIndex = 1;
1601  assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1602  VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1603  }
1604 
1605  // Library call cost - other than size, make it expensive.
1606  unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
1608  switch (IID) {
1609  default: {
1610  // Scalable vectors cannot be scalarized, so return Invalid.
1611  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
1612  return isa<ScalableVectorType>(Ty);
1613  }))
1614  return InstructionCost::getInvalid();
1615 
1616  // Assume that we need to scalarize this intrinsic.
1617  InstructionCost ScalarizationCost =
1618  SkipScalarizationCost ? ScalarizationCostPassed : 0;
1619  unsigned ScalarCalls = 1;
1620  Type *ScalarRetTy = RetTy;
1621  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1622  if (!SkipScalarizationCost)
1623  ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1624  ScalarCalls = std::max(ScalarCalls,
1625  cast<FixedVectorType>(RetVTy)->getNumElements());
1626  ScalarRetTy = RetTy->getScalarType();
1627  }
1628  SmallVector<Type *, 4> ScalarTys;
1629  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1630  Type *Ty = Tys[i];
1631  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1632  if (!SkipScalarizationCost)
1633  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1634  ScalarCalls = std::max(ScalarCalls,
1635  cast<FixedVectorType>(VTy)->getNumElements());
1636  Ty = Ty->getScalarType();
1637  }
1638  ScalarTys.push_back(Ty);
1639  }
1640  if (ScalarCalls == 1)
1641  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1642 
1643  IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1644  InstructionCost ScalarCost =
1645  thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1646 
1647  return ScalarCalls * ScalarCost + ScalarizationCost;
1648  }
1649  // Look for intrinsics that can be lowered directly or turned into a scalar
1650  // intrinsic call.
1651  case Intrinsic::sqrt:
1652  ISDs.push_back(ISD::FSQRT);
1653  break;
1654  case Intrinsic::sin:
1655  ISDs.push_back(ISD::FSIN);
1656  break;
1657  case Intrinsic::cos:
1658  ISDs.push_back(ISD::FCOS);
1659  break;
1660  case Intrinsic::exp:
1661  ISDs.push_back(ISD::FEXP);
1662  break;
1663  case Intrinsic::exp2:
1664  ISDs.push_back(ISD::FEXP2);
1665  break;
1666  case Intrinsic::log:
1667  ISDs.push_back(ISD::FLOG);
1668  break;
1669  case Intrinsic::log10:
1670  ISDs.push_back(ISD::FLOG10);
1671  break;
1672  case Intrinsic::log2:
1673  ISDs.push_back(ISD::FLOG2);
1674  break;
1675  case Intrinsic::fabs:
1676  ISDs.push_back(ISD::FABS);
1677  break;
1678  case Intrinsic::canonicalize:
1679  ISDs.push_back(ISD::FCANONICALIZE);
1680  break;
1681  case Intrinsic::minnum:
1682  ISDs.push_back(ISD::FMINNUM);
1683  break;
1684  case Intrinsic::maxnum:
1685  ISDs.push_back(ISD::FMAXNUM);
1686  break;
1687  case Intrinsic::minimum:
1688  ISDs.push_back(ISD::FMINIMUM);
1689  break;
1690  case Intrinsic::maximum:
1691  ISDs.push_back(ISD::FMAXIMUM);
1692  break;
1693  case Intrinsic::copysign:
1694  ISDs.push_back(ISD::FCOPYSIGN);
1695  break;
1696  case Intrinsic::floor:
1697  ISDs.push_back(ISD::FFLOOR);
1698  break;
1699  case Intrinsic::ceil:
1700  ISDs.push_back(ISD::FCEIL);
1701  break;
1702  case Intrinsic::trunc:
1703  ISDs.push_back(ISD::FTRUNC);
1704  break;
1705  case Intrinsic::nearbyint:
1706  ISDs.push_back(ISD::FNEARBYINT);
1707  break;
1708  case Intrinsic::rint:
1709  ISDs.push_back(ISD::FRINT);
1710  break;
1711  case Intrinsic::round:
1712  ISDs.push_back(ISD::FROUND);
1713  break;
1714  case Intrinsic::roundeven:
1715  ISDs.push_back(ISD::FROUNDEVEN);
1716  break;
1717  case Intrinsic::pow:
1718  ISDs.push_back(ISD::FPOW);
1719  break;
1720  case Intrinsic::fma:
1721  ISDs.push_back(ISD::FMA);
1722  break;
1723  case Intrinsic::fmuladd:
1724  ISDs.push_back(ISD::FMA);
1725  break;
1726  case Intrinsic::experimental_constrained_fmuladd:
1727  ISDs.push_back(ISD::STRICT_FMA);
1728  break;
1729  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1730  case Intrinsic::lifetime_start:
1731  case Intrinsic::lifetime_end:
1732  case Intrinsic::sideeffect:
1733  case Intrinsic::pseudoprobe:
1734  case Intrinsic::arithmetic_fence:
1735  return 0;
1736  case Intrinsic::masked_store: {
1737  Type *Ty = Tys[0];
1738  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1739  return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1740  CostKind);
1741  }
1742  case Intrinsic::masked_load: {
1743  Type *Ty = RetTy;
1744  Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1745  return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1746  CostKind);
1747  }
1748  case Intrinsic::vector_reduce_add:
1749  return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1750  None, CostKind);
1751  case Intrinsic::vector_reduce_mul:
1752  return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1753  None, CostKind);
1754  case Intrinsic::vector_reduce_and:
1755  return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1756  None, CostKind);
1757  case Intrinsic::vector_reduce_or:
1758  return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None,
1759  CostKind);
1760  case Intrinsic::vector_reduce_xor:
1761  return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1762  None, CostKind);
1763  case Intrinsic::vector_reduce_fadd:
1764  return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1765  FMF, CostKind);
1766  case Intrinsic::vector_reduce_fmul:
1767  return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1768  FMF, CostKind);
1769  case Intrinsic::vector_reduce_smax:
1770  case Intrinsic::vector_reduce_smin:
1771  case Intrinsic::vector_reduce_fmax:
1772  case Intrinsic::vector_reduce_fmin:
1773  return thisT()->getMinMaxReductionCost(
1774  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1775  /*IsUnsigned=*/false, CostKind);
1776  case Intrinsic::vector_reduce_umax:
1777  case Intrinsic::vector_reduce_umin:
1778  return thisT()->getMinMaxReductionCost(
1779  VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1780  /*IsUnsigned=*/true, CostKind);
1781  case Intrinsic::abs: {
1782  // abs(X) = select(icmp(X,0),X,sub(0,X))
1783  Type *CondTy = RetTy->getWithNewBitWidth(1);
1785  InstructionCost Cost = 0;
1786  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1787  Pred, CostKind);
1788  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1789  Pred, CostKind);
1790  // TODO: Should we add an OperandValueProperties::OP_Zero property?
1791  Cost += thisT()->getArithmeticInstrCost(
1792  BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
1793  return Cost;
1794  }
1795  case Intrinsic::smax:
1796  case Intrinsic::smin:
1797  case Intrinsic::umax:
1798  case Intrinsic::umin: {
1799  // minmax(X,Y) = select(icmp(X,Y),X,Y)
1800  Type *CondTy = RetTy->getWithNewBitWidth(1);
1801  bool IsUnsigned = IID == Intrinsic::umax || IID == Intrinsic::umin;
1802  CmpInst::Predicate Pred =
1803  IsUnsigned ? CmpInst::ICMP_UGT : CmpInst::ICMP_SGT;
1804  InstructionCost Cost = 0;
1805  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1806  Pred, CostKind);
1807  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1808  Pred, CostKind);
1809  return Cost;
1810  }
1811  case Intrinsic::sadd_sat:
1812  case Intrinsic::ssub_sat: {
1813  Type *CondTy = RetTy->getWithNewBitWidth(1);
1814 
1815  Type *OpTy = StructType::create({RetTy, CondTy});
1816  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1817  ? Intrinsic::sadd_with_overflow
1818  : Intrinsic::ssub_with_overflow;
1820 
1821  // SatMax -> Overflow && SumDiff < 0
1822  // SatMin -> Overflow && SumDiff >= 0
1823  InstructionCost Cost = 0;
1824  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1825  nullptr, ScalarizationCostPassed);
1826  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1827  Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1828  Pred, CostKind);
1829  Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1830  CondTy, Pred, CostKind);
1831  return Cost;
1832  }
1833  case Intrinsic::uadd_sat:
1834  case Intrinsic::usub_sat: {
1835  Type *CondTy = RetTy->getWithNewBitWidth(1);
1836 
1837  Type *OpTy = StructType::create({RetTy, CondTy});
1838  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1839  ? Intrinsic::uadd_with_overflow
1840  : Intrinsic::usub_with_overflow;
1841 
1842  InstructionCost Cost = 0;
1843  IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1844  nullptr, ScalarizationCostPassed);
1845  Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1846  Cost +=
1847  thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1849  return Cost;
1850  }
1851  case Intrinsic::smul_fix:
1852  case Intrinsic::umul_fix: {
1853  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1854  Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1855 
1856  unsigned ExtOp =
1857  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1859 
1860  InstructionCost Cost = 0;
1861  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1862  Cost +=
1863  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1864  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1865  CCH, CostKind);
1866  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1869  Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1872  Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1873  return Cost;
1874  }
1875  case Intrinsic::sadd_with_overflow:
1876  case Intrinsic::ssub_with_overflow: {
1877  Type *SumTy = RetTy->getContainedType(0);
1878  Type *OverflowTy = RetTy->getContainedType(1);
1879  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1881  : BinaryOperator::Sub;
1882 
1883  // Add:
1884  // Overflow -> (Result < LHS) ^ (RHS < 0)
1885  // Sub:
1886  // Overflow -> (Result < LHS) ^ (RHS > 0)
1887  InstructionCost Cost = 0;
1888  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1889  Cost += 2 * thisT()->getCmpSelInstrCost(
1890  Instruction::ICmp, SumTy, OverflowTy,
1892  Cost += thisT()->getArithmeticInstrCost(BinaryOperator::Xor, OverflowTy,
1893  CostKind);
1894  return Cost;
1895  }
1896  case Intrinsic::uadd_with_overflow:
1897  case Intrinsic::usub_with_overflow: {
1898  Type *SumTy = RetTy->getContainedType(0);
1899  Type *OverflowTy = RetTy->getContainedType(1);
1900  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1902  : BinaryOperator::Sub;
1903  CmpInst::Predicate Pred = IID == Intrinsic::uadd_with_overflow
1906 
1907  InstructionCost Cost = 0;
1908  Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1909  Cost +=
1910  thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1911  Pred, CostKind);
1912  return Cost;
1913  }
1914  case Intrinsic::smul_with_overflow:
1915  case Intrinsic::umul_with_overflow: {
1916  Type *MulTy = RetTy->getContainedType(0);
1917  Type *OverflowTy = RetTy->getContainedType(1);
1918  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1919  Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1920  bool IsSigned = IID == Intrinsic::smul_with_overflow;
1921 
1922  unsigned ExtOp = IsSigned ? Instruction::SExt : Instruction::ZExt;
1924 
1925  InstructionCost Cost = 0;
1926  Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
1927  Cost +=
1928  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1929  Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1930  CCH, CostKind);
1931  Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, ExtTy,
1934 
1935  if (IsSigned)
1936  Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1939 
1940  Cost += thisT()->getCmpSelInstrCost(
1941  BinaryOperator::ICmp, MulTy, OverflowTy, CmpInst::ICMP_NE, CostKind);
1942  return Cost;
1943  }
1944  case Intrinsic::fptosi_sat:
1945  case Intrinsic::fptoui_sat: {
1946  if (Tys.empty())
1947  break;
1948  Type *FromTy = Tys[0];
1949  bool IsSigned = IID == Intrinsic::fptosi_sat;
1950 
1951  InstructionCost Cost = 0;
1953  {FromTy, FromTy});
1954  Cost += thisT()->getIntrinsicInstrCost(Attrs1, CostKind);
1956  {FromTy, FromTy});
1957  Cost += thisT()->getIntrinsicInstrCost(Attrs2, CostKind);
1958  Cost += thisT()->getCastInstrCost(
1959  IsSigned ? Instruction::FPToSI : Instruction::FPToUI, RetTy, FromTy,
1961  if (IsSigned) {
1962  Type *CondTy = RetTy->getWithNewBitWidth(1);
1963  Cost += thisT()->getCmpSelInstrCost(
1964  BinaryOperator::FCmp, FromTy, CondTy, CmpInst::FCMP_UNO, CostKind);
1965  Cost += thisT()->getCmpSelInstrCost(
1967  }
1968  return Cost;
1969  }
1970  case Intrinsic::ctpop:
1971  ISDs.push_back(ISD::CTPOP);
1972  // In case of legalization use TCC_Expensive. This is cheaper than a
1973  // library call but still not a cheap instruction.
1974  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1975  break;
1976  case Intrinsic::ctlz:
1977  ISDs.push_back(ISD::CTLZ);
1978  break;
1979  case Intrinsic::cttz:
1980  ISDs.push_back(ISD::CTTZ);
1981  break;
1982  case Intrinsic::bswap:
1983  ISDs.push_back(ISD::BSWAP);
1984  break;
1985  case Intrinsic::bitreverse:
1986  ISDs.push_back(ISD::BITREVERSE);
1987  break;
1988  }
1989 
1990  const TargetLoweringBase *TLI = getTLI();
1991  std::pair<InstructionCost, MVT> LT =
1992  TLI->getTypeLegalizationCost(DL, RetTy);
1993 
1996  for (unsigned ISD : ISDs) {
1997  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1998  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1999  TLI->isFAbsFree(LT.second)) {
2000  return 0;
2001  }
2002 
2003  // The operation is legal. Assume it costs 1.
2004  // If the type is split to multiple registers, assume that there is some
2005  // overhead to this.
2006  // TODO: Once we have extract/insert subvector cost we need to use them.
2007  if (LT.first > 1)
2008  LegalCost.push_back(LT.first * 2);
2009  else
2010  LegalCost.push_back(LT.first * 1);
2011  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
2012  // If the operation is custom lowered then assume
2013  // that the code is twice as expensive.
2014  CustomCost.push_back(LT.first * 2);
2015  }
2016  }
2017 
2018  auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
2019  if (MinLegalCostI != LegalCost.end())
2020  return *MinLegalCostI;
2021 
2022  auto MinCustomCostI =
2023  std::min_element(CustomCost.begin(), CustomCost.end());
2024  if (MinCustomCostI != CustomCost.end())
2025  return *MinCustomCostI;
2026 
2027  // If we can't lower fmuladd into an FMA estimate the cost as a floating
2028  // point mul followed by an add.
2029  if (IID == Intrinsic::fmuladd)
2030  return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
2031  CostKind) +
2032  thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
2033  CostKind);
2034  if (IID == Intrinsic::experimental_constrained_fmuladd) {
2035  IntrinsicCostAttributes FMulAttrs(
2036  Intrinsic::experimental_constrained_fmul, RetTy, Tys);
2037  IntrinsicCostAttributes FAddAttrs(
2038  Intrinsic::experimental_constrained_fadd, RetTy, Tys);
2039  return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
2040  thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
2041  }
2042 
2043  // Else, assume that we need to scalarize this intrinsic. For math builtins
2044  // this will emit a costly libcall, adding call overhead and spills. Make it
2045  // very expensive.
2046  if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
2047  // Scalable vectors cannot be scalarized, so return Invalid.
2048  if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) {
2049  return isa<ScalableVectorType>(Ty);
2050  }))
2051  return InstructionCost::getInvalid();
2052 
2053  InstructionCost ScalarizationCost =
2054  SkipScalarizationCost ? ScalarizationCostPassed
2055  : getScalarizationOverhead(RetVTy, true, false);
2056 
2057  unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
2058  SmallVector<Type *, 4> ScalarTys;
2059  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2060  Type *Ty = Tys[i];
2061  if (Ty->isVectorTy())
2062  Ty = Ty->getScalarType();
2063  ScalarTys.push_back(Ty);
2064  }
2065  IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
2066  InstructionCost ScalarCost =
2067  thisT()->getIntrinsicInstrCost(Attrs, CostKind);
2068  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
2069  if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
2070  if (!ICA.skipScalarizationCost())
2071  ScalarizationCost += getScalarizationOverhead(VTy, false, true);
2072  ScalarCalls = std::max(ScalarCalls,
2073  cast<FixedVectorType>(VTy)->getNumElements());
2074  }
2075  }
2076  return ScalarCalls * ScalarCost + ScalarizationCost;
2077  }
2078 
2079  // This is going to be turned into a library call, make it expensive.
2080  return SingleCallCost;
2081  }
2082 
2083  /// Compute a cost of the given call instruction.
2084  ///
2085  /// Compute the cost of calling function F with return type RetTy and
2086  /// argument types Tys. F might be nullptr, in this case the cost of an
2087  /// arbitrary call with the specified signature will be returned.
2088  /// This is used, for instance, when we estimate call of a vector
2089  /// counterpart of the given function.
2090  /// \param F Called function, might be nullptr.
2091  /// \param RetTy Return value types.
2092  /// \param Tys Argument types.
2093  /// \returns The cost of Call instruction.
2095  ArrayRef<Type *> Tys,
2097  return 10;
2098  }
2099 
2100  unsigned getNumberOfParts(Type *Tp) {
2101  std::pair<InstructionCost, MVT> LT =
2102  getTLI()->getTypeLegalizationCost(DL, Tp);
2103  return LT.first.isValid() ? *LT.first.getValue() : 0;
2104  }
2105 
2107  const SCEV *) {
2108  return 0;
2109  }
2110 
2111  /// Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
2112  /// We're assuming that reduction operation are performing the following way:
2113  ///
2114  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
2115  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
2116  /// \----------------v-------------/ \----------v------------/
2117  /// n/2 elements n/2 elements
2118  /// %red1 = op <n x t> %val, <n x t> val1
2119  /// After this operation we have a vector %red1 where only the first n/2
2120  /// elements are meaningful, the second n/2 elements are undefined and can be
2121  /// dropped. All other operations are actually working with the vector of
2122  /// length n/2, not n, though the real vector length is still n.
2123  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
2124  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
2125  /// \----------------v-------------/ \----------v------------/
2126  /// n/4 elements 3*n/4 elements
2127  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
2128  /// length n/2, the resulting vector has length n/4 etc.
2129  ///
2130  /// The cost model should take into account that the actual length of the
2131  /// vector is reduced on each iteration.
2134  Type *ScalarTy = Ty->getElementType();
2135  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2136  if ((Opcode == Instruction::Or || Opcode == Instruction::And) &&
2137  ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) &&
2138  NumVecElts >= 2) {
2139  // Or reduction for i1 is represented as:
2140  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2141  // %res = cmp ne iReduxWidth %val, 0
2142  // And reduction for i1 is represented as:
2143  // %val = bitcast <ReduxWidth x i1> to iReduxWidth
2144  // %res = cmp eq iReduxWidth %val, 11111
2145  Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts);
2146  return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty,
2148  thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy,
2151  }
2152  unsigned NumReduxLevels = Log2_32(NumVecElts);
2153  InstructionCost ArithCost = 0;
2154  InstructionCost ShuffleCost = 0;
2155  std::pair<InstructionCost, MVT> LT =
2156  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
2157  unsigned LongVectorCount = 0;
2158  unsigned MVTLen =
2159  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2160  while (NumVecElts > MVTLen) {
2161  NumVecElts /= 2;
2162  VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2163  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2164  NumVecElts, SubTy);
2165  ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
2166  Ty = SubTy;
2167  ++LongVectorCount;
2168  }
2169 
2170  NumReduxLevels -= LongVectorCount;
2171 
2172  // The minimal length of the vector is limited by the real length of vector
2173  // operations performed on the current platform. That's why several final
2174  // reduction operations are performed on the vectors with the same
2175  // architecture-dependent length.
2176 
2177  // By default reductions need one shuffle per reduction level.
2178  ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2179  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
2180  ArithCost +=
2181  NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty, CostKind);
2182  return ShuffleCost + ArithCost +
2183  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2184  }
2185 
2186  /// Try to calculate the cost of performing strict (in-order) reductions,
2187  /// which involves doing a sequence of floating point additions in lane
2188  /// order, starting with an initial value. For example, consider a scalar
2189  /// initial value 'InitVal' of type float and a vector of type <4 x float>:
2190  ///
2191  /// Vector = <float %v0, float %v1, float %v2, float %v3>
2192  ///
2193  /// %add1 = %InitVal + %v0
2194  /// %add2 = %add1 + %v1
2195  /// %add3 = %add2 + %v2
2196  /// %add4 = %add3 + %v3
2197  ///
2198  /// As a simple estimate we can say the cost of such a reduction is 4 times
2199  /// the cost of a scalar FP addition. We can only estimate the costs for
2200  /// fixed-width vectors here because for scalable vectors we do not know the
2201  /// runtime number of operations.
2204  // Targets must implement a default value for the scalable case, since
2205  // we don't know how many lanes the vector has.
2206  if (isa<ScalableVectorType>(Ty))
2207  return InstructionCost::getInvalid();
2208 
2209  auto *VTy = cast<FixedVectorType>(Ty);
2210  InstructionCost ExtractCost =
2211  getScalarizationOverhead(VTy, /*Insert=*/false, /*Extract=*/true);
2212  InstructionCost ArithCost = thisT()->getArithmeticInstrCost(
2213  Opcode, VTy->getElementType(), CostKind);
2214  ArithCost *= VTy->getNumElements();
2215 
2216  return ExtractCost + ArithCost;
2217  }
2218 
2223  return getOrderedReductionCost(Opcode, Ty, CostKind);
2224  return getTreeReductionCost(Opcode, Ty, CostKind);
2225  }
2226 
2227  /// Try to calculate op costs for min/max reduction operations.
2228  /// \param CondTy Conditional type for the Select instruction.
2230  bool IsUnsigned,
2232  Type *ScalarTy = Ty->getElementType();
2233  Type *ScalarCondTy = CondTy->getElementType();
2234  unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
2235  unsigned NumReduxLevels = Log2_32(NumVecElts);
2236  unsigned CmpOpcode;
2237  if (Ty->isFPOrFPVectorTy()) {
2238  CmpOpcode = Instruction::FCmp;
2239  } else {
2240  assert(Ty->isIntOrIntVectorTy() &&
2241  "expecting floating point or integer type for min/max reduction");
2242  CmpOpcode = Instruction::ICmp;
2243  }
2244  InstructionCost MinMaxCost = 0;
2245  InstructionCost ShuffleCost = 0;
2246  std::pair<InstructionCost, MVT> LT =
2247  thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
2248  unsigned LongVectorCount = 0;
2249  unsigned MVTLen =
2250  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
2251  while (NumVecElts > MVTLen) {
2252  NumVecElts /= 2;
2253  auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
2254  CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
2255 
2256  ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None,
2257  NumVecElts, SubTy);
2258  MinMaxCost +=
2259  thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
2261  thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
2263  Ty = SubTy;
2264  ++LongVectorCount;
2265  }
2266 
2267  NumReduxLevels -= LongVectorCount;
2268 
2269  // The minimal length of the vector is limited by the real length of vector
2270  // operations performed on the current platform. That's why several final
2271  // reduction opertions are perfomed on the vectors with the same
2272  // architecture-dependent length.
2273  ShuffleCost += NumReduxLevels * thisT()->getShuffleCost(
2274  TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty);
2275  MinMaxCost +=
2276  NumReduxLevels *
2277  (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2279  thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2281  // The last min/max should be in vector registers and we counted it above.
2282  // So just need a single extractelement.
2283  return ShuffleCost + MinMaxCost +
2284  thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
2285  }
2286 
2287  InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned,
2288  Type *ResTy, VectorType *Ty,
2290  // Without any native support, this is equivalent to the cost of
2291  // vecreduce.add(ext) or if IsMLA vecreduce.add(mul(ext, ext))
2292  VectorType *ExtTy = VectorType::get(ResTy, Ty);
2293  InstructionCost RedCost = thisT()->getArithmeticReductionCost(
2294  Instruction::Add, ExtTy, None, CostKind);
2295  InstructionCost MulCost = 0;
2296  InstructionCost ExtCost = thisT()->getCastInstrCost(
2297  IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty,
2299  if (IsMLA) {
2300  MulCost =
2301  thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
2302  ExtCost *= 2;
2303  }
2304 
2305  return RedCost + MulCost + ExtCost;
2306  }
2307 
2309 
2310  /// @}
2311 };
2312 
2313 /// Concrete BasicTTIImpl that can be used if no further customization
2314 /// is needed.
2315 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2317 
2319 
2320  const TargetSubtargetInfo *ST;
2321  const TargetLoweringBase *TLI;
2322 
2323  const TargetSubtargetInfo *getST() const { return ST; }
2324  const TargetLoweringBase *getTLI() const { return TLI; }
2325 
2326 public:
2327  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
2328 };
2329 
2330 } // end namespace llvm
2331 
2332 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
llvm::ShuffleVectorInst::isZeroEltSplatMask
static bool isZeroEltSplatMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses all elements with the same value as the first element of exa...
Definition: Instructions.cpp:2234
llvm::MCSubtargetInfo::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: MCSubtargetInfo.cpp:359
llvm::ISD::FROUNDEVEN
@ FROUNDEVEN
Definition: ISDOpcodes.h:922
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::BasicTTIImplBase::getVectorSplitCost
InstructionCost getVectorSplitCost()
Definition: BasicTTIImpl.h:2308
llvm::TargetLoweringBase::isTruncStoreLegal
bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const
Return true if the specified store with truncation is legal on this target.
Definition: TargetLowering.h:1309
llvm::BasicTTIImplBase::getFPOpCost
InstructionCost getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:498
ValueTypes.h
llvm::TargetTransformInfo::UnrollingPreferences::BEInsns
unsigned BEInsns
Definition: TargetTransformInfo.h:480
llvm::TargetTransformInfo::UnrollingPreferences::PartialOptSizeThreshold
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
Definition: TargetTransformInfo.h:459
llvm::TargetTransformInfo::SK_Select
@ SK_Select
Selects elements from the corresponding lane of either source operand.
Definition: TargetTransformInfo.h:874
Attrs
Function Attrs
Definition: README_ALTIVEC.txt:215
llvm::TargetTransformInfo::UnrollingPreferences::Runtime
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
Definition: TargetTransformInfo.h:487
llvm::TargetTransformInfo::TargetCostKind
TargetCostKind
The kind of cost model.
Definition: TargetTransformInfo.h:210
llvm::MVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: MachineValueType.h:1076
llvm::BasicTTIImplBase::getOperandsScalarizationOverhead
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:734
llvm::TargetTransformInfoImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Definition: TargetTransformInfoImpl.h:157
llvm::TargetTransformInfo::TCC_Expensive
@ TCC_Expensive
The cost of a 'div' instruction on x86.
Definition: TargetTransformInfo.h:263
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1353
llvm::BasicTTIImplBase::shouldBuildRelLookupTables
bool shouldBuildRelLookupTables() const
Definition: BasicTTIImpl.h:462
MathExtras.h
llvm::TargetTransformInfo::UnrollingPreferences::PartialThreshold
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
Definition: TargetTransformInfo.h:455
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::TargetLoweringBase
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
Definition: TargetLowering.h:191
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::BasicTTIImplBase::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:649
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::InstructionCost::getValue
Optional< CostType > getValue() const
This function is intended to be used as sparingly as possible, since the class provides the full rang...
Definition: InstructionCost.h:87
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::TargetTransformInfoImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI) const
Definition: TargetTransformInfoImpl.h:163
llvm::BasicTTIImplBase::getInliningThresholdMultiplier
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:508
llvm::BasicTTIImplBase::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:370
llvm::BasicTTIImplBase::isAlwaysUniform
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:266
llvm::TargetTransformInfo::MemIndexedMode
MemIndexedMode
The type of load/store indexing.
Definition: TargetTransformInfo.h:1334
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:984
ceil
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g ceil
Definition: README-FPStack.txt:54
llvm::ElementCount
Definition: TypeSize.h:390
llvm::BasicTTIImplBase::isLegalICmpImmediate
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:300
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:936
llvm::BasicTTIImplBase::isNoopAddrSpaceCast
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Definition: BasicTTIImpl.h:278
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:163
llvm::Function
Definition: Function.h:60
llvm::Loop
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:530
llvm::ISD::PRE_DEC
@ PRE_DEC
Definition: ISDOpcodes.h:1353
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::BasicTTIImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1086
llvm::BasicTTIImplBase::isTypeLegal
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:380
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:727
minimum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For minimum
Definition: README.txt:489
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::CodeModel::Medium
@ Medium
Definition: CodeGen.h:28
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::TargetTransformInfo::PeelingPreferences::AllowPeeling
bool AllowPeeling
Allow peeling off loop iterations.
Definition: TargetTransformInfo.h:543
llvm::TargetTransformInfoImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) const
Definition: TargetTransformInfoImpl.h:213
llvm::BasicTTIImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)
Definition: BasicTTIImpl.h:611
ErrorHandling.h
llvm::CmpInst::makeCmpResultType
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1044
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:167
llvm::Loop::getStartLoc
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
Definition: LoopInfo.cpp:630
llvm::IntrinsicCostAttributes::getReturnType
Type * getReturnType() const
Definition: TargetTransformInfo.h:149
llvm::BasicTTIImplBase::BasicTTIImplBase
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:243
llvm::BasicTTIImplBase::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:252
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:449
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
llvm::IntrinsicCostAttributes::getInst
const IntrinsicInst * getInst() const
Definition: TargetTransformInfo.h:148
llvm::BasicTTIImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr)
Definition: BasicTTIImpl.h:777
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:913
OptimizationRemarkEmitter.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
llvm::Triple
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::Type::isFPOrFPVectorTy
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:179
llvm::TargetTransformInfo::TCK_CodeSize
@ TCK_CodeSize
Instruction code size.
Definition: TargetTransformInfo.h:213
llvm::TargetLoweringBase::isLegalICmpImmediate
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: TargetLowering.h:2505
llvm::APIntOps::ScaleBitMask
APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth)
Splat/Merge neighboring bits to widen/narrow the bitmask represented by.
Definition: APInt.cpp:2989
APInt.h
llvm::BasicTTIImplBase::isFCmpOrdCheaperThanFCmpZero
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:494
llvm::BasicTTIImplBase::~BasicTTIImplBase
virtual ~BasicTTIImplBase()=default
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:746
llvm::TargetTransformInfo::UnrollingPreferences::Partial
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
Definition: TargetTransformInfo.h:483
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::TargetLoweringBase::getTruncStoreAction
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
Definition: TargetLowering.h:1298
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1423
llvm::TargetLoweringBase::isIndexedLoadLegal
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1337
llvm::BasicTTIImplBase::isLSRCostLess
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:347
llvm::TargetTransformInfo::PeelingPreferences
Definition: TargetTransformInfo.h:537
llvm::TargetLoweringBase::isProfitableToHoist
virtual bool isProfitableToHoist(Instruction *I) const
Definition: TargetLowering.h:2642
llvm::BasicTTIImplBase::improveShuffleKindFromMask
TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, ArrayRef< int > Mask) const
Definition: BasicTTIImpl.h:859
llvm::TargetLoweringBase::isSuitableForJumpTable
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
Definition: TargetLoweringBase.cpp:1583
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::BasicTTIImplBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:635
llvm::Optional
Definition: APInt.h:33
llvm::BasicTTIImplBase::isIndexedStoreLegal
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:341
T
#define T
Definition: Mips16ISelLowering.cpp:341
llvm::BasicTTIImplBase::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Definition: BasicTTIImpl.h:667
llvm::SmallPtrSet< const BasicBlock *, 4 >
llvm::TargetTransformInfo::OP_PowerOf2
@ OP_PowerOf2
Definition: TargetTransformInfo.h:898
llvm::ore::NV
DiagnosticInfoOptimizationBase::Argument NV
Definition: OptimizationRemarkEmitter.h:136
llvm::TargetLoweringBase::getTypeLegalizationCost
std::pair< InstructionCost, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
Definition: TargetLoweringBase.cpp:1808
Operator.h
llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:422
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1384
llvm::TargetTransformInfoImplCRTPBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: TargetTransformInfoImpl.h:907
llvm::StructType::create
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:513
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::ShuffleVectorInst::isReverseMask
static bool isReverseMask(ArrayRef< int > Mask)
Return true if this shuffle mask swaps the order of elements from exactly one source vector.
Definition: Instructions.cpp:2216
llvm::BasicTTIImplBase::getRegisterBitWidth
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
Definition: BasicTTIImpl.h:688
llvm::TargetLoweringBase::LegalizeAction
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
Definition: TargetLowering.h:195
llvm::FastMathFlags
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:21
llvm::TargetLoweringBase::isCheapToSpeculateCtlz
virtual bool isCheapToSpeculateCtlz() const
Return true if it is cheap to speculate a call to intrinsic ctlz.
Definition: TargetLowering.h:625
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::TargetTransformInfo::SK_PermuteSingleSrc
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
Definition: TargetTransformInfo.h:882
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
llvm::BasicTTIImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR()
Definition: BasicTTIImpl.h:351
llvm::APInt::getZero
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:177
llvm::APIntOps::umin
const APInt & umin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be unsigned.
Definition: APInt.h:2149
llvm::TargetTransformInfo::CacheLevel
CacheLevel
The possible cache levels.
Definition: TargetTransformInfo.h:974
llvm::ISD::FABS
@ FABS
Definition: ISDOpcodes.h:905
llvm::BasicTTIImplBase::isSourceOfDivergence
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:264
llvm::IntrinsicCostAttributes::getScalarizationCost
InstructionCost getScalarizationCost() const
Definition: TargetTransformInfo.h:151
llvm::TargetTransformInfo::SK_Broadcast
@ SK_Broadcast
Broadcast element 0 to all other elements.
Definition: TargetTransformInfo.h:872
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::LinearPolySize< TypeSize >::isKnownLT
static bool isKnownLT(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:328
TargetTransformInfoImpl.h
llvm::BasicTTIImplBase::getTreeReductionCost
InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate arithmetic and shuffle op costs for reduction intrinsics.
Definition: BasicTTIImpl.h:2132
llvm::Triple::isArch64Bit
bool isArch64Bit() const
Test whether the architecture is 64-bit.
Definition: Triple.cpp:1432
llvm::IntrinsicCostAttributes::skipScalarizationCost
bool skipScalarizationCost() const
Definition: TargetTransformInfo.h:159
llvm::BasicTTIImplBase::rewriteIntrinsicWithAddressSpace
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Definition: BasicTTIImpl.h:291
llvm::TargetTransformInfo::requiresOrderedReduction
static bool requiresOrderedReduction(Optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
Definition: TargetTransformInfo.h:1206
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::IntrinsicCostAttributes::getFlags
FastMathFlags getFlags() const
Definition: TargetTransformInfo.h:150
floor
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g floor
Definition: README-FPStack.txt:54
MachineValueType.h
llvm::ISD::BRIND
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:980
llvm::AArch64CC::LT
@ LT
Definition: AArch64BaseInfo.h:266
Context
LLVMContext & Context
Definition: NVVMIntrRange.cpp:66
llvm::BasicTTIImplBase::getStoreMinimumVF
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
Definition: BasicTTIImpl.h:315
llvm::ElementCount::isScalar
bool isScalar() const
Counting predicates.
Definition: TypeSize.h:400
llvm::TargetTransformInfoImplBase::getDataLayout
const DataLayout & getDataLayout() const
Definition: TargetTransformInfoImpl.h:46
llvm::BasicTTIImplBase::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Definition: BasicTTIImpl.h:663
llvm::DataLayout::getIndexSizeInBits
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:422
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:923
llvm::TargetLoweringBase::isLegalAddImmediate
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
Definition: TargetLowering.h:2512
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Instruction.h
llvm::TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR
bool isNumRegsMajorCostOfLSR() const
Definition: TargetTransformInfoImpl.h:220
CommandLine.h
llvm::FixedVectorType::getNumElements
unsigned getNumElements() const
Definition: DerivedTypes.h:568
TargetLowering.h
llvm::IntrinsicCostAttributes::getArgTypes
const SmallVectorImpl< Type * > & getArgTypes() const
Definition: TargetTransformInfo.h:153
llvm::MCSubtargetInfo::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Return the maximum prefetch distance in terms of loop iterations.
Definition: MCSubtargetInfo.cpp:355
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1366
llvm::TargetTransformInfo::SK_PermuteTwoSrc
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
Definition: TargetTransformInfo.h:880
llvm::TargetTransformInfo::PeelingPreferences::PeelProfiledIterations
bool PeelProfiledIterations
Allow peeling basing on profile.
Definition: TargetTransformInfo.h:550
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::TargetLoweringBase::isOperationLegalOrCustom
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1156
llvm::MCSubtargetInfo::getPrefetchDistance
virtual unsigned getPrefetchDistance() const
Return the preferred prefetch distance in terms of instructions.
Definition: MCSubtargetInfo.cpp:351
llvm::BasicTTIImplBase::getUnrollingPreferences
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)
Definition: BasicTTIImpl.h:513
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1292
TargetMachine.h
llvm::ISD::CTLZ
@ CTLZ
Definition: ISDOpcodes.h:702
llvm::TargetTransformInfoImplBase
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:33
llvm::BasicTTIImplBase::adjustInliningThreshold
unsigned adjustInliningThreshold(const CallBase *CB)
Definition: BasicTTIImpl.h:509
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:713
llvm::TargetTransformInfoImplBase::getCacheAssociativity
llvm::Optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
Definition: TargetTransformInfoImpl.h:449
Constants.h
llvm::BasicTTIImplBase::getVectorInstrCost
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:1139
llvm::TargetTransformInfo::OP_None
@ OP_None
Definition: TargetTransformInfo.h:898
llvm::TargetTransformInfo::ShuffleKind
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Definition: TargetTransformInfo.h:871
llvm::BasicTTIImplBase::useGPUDivergenceAnalysis
bool useGPUDivergenceAnalysis()
Definition: BasicTTIImpl.h:262
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::Triple::isOSDarwin
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, or DriverKit).
Definition: Triple.h:500
llvm::BasicTTIImplBase::enableWritePrefetching
virtual bool enableWritePrefetching() const
Definition: BasicTTIImpl.h:679
llvm::TargetTransformInfo::CastContextHint
CastContextHint
Represents a hint about the context in which a cast is used.
Definition: TargetTransformInfo.h:1091
llvm::BasicTTIImplBase::getAddressComputationCost
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:2106
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
llvm::TargetLoweringBase::AddrMode::HasBaseReg
bool HasBaseReg
Definition: TargetLowering.h:2467
llvm::BitVector::count
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:155
round
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:56
llvm::BasicTTIImplBase::getFlatAddressSpace
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:268
InstrTypes.h
llvm::BasicTTIImplBase::getReplicationShuffleCost
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1147
Y
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
llvm::BasicTTIImplBase::getArithmeticReductionCost
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, Optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2219
llvm::ISD::FROUND
@ FROUND
Definition: ISDOpcodes.h:921
llvm::APInt::getAllOnes
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
llvm::LoopBase::blocks
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:178
llvm::BasicTTIImplBase::getPredicatedAddrSpace
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:287
llvm::BasicTTIImplBase::getExtractWithExtendCost
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:1077
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
llvm::BasicTTIImplBase::getCacheSize
virtual Optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
Definition: BasicTTIImpl.h:643
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:623
llvm::TargetLoweringBase::isCheapToSpeculateCttz
virtual bool isCheapToSpeculateCttz() const
Return true if it is cheap to speculate a call to intrinsic cttz.
Definition: TargetLowering.h:620
llvm::BasicTTIImplBase::getMaxInterleaveFactor
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:775
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:729
llvm::Instruction
Definition: Instruction.h:42
llvm::TargetTransformInfo::MIM_PreDec
@ MIM_PreDec
Pre-decrementing.
Definition: TargetTransformInfo.h:1337
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::TargetLoweringBase::isLegalAddressingMode
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Definition: TargetLoweringBase.cpp:1895
llvm::TargetMachine::getAssumedAddrSpace
virtual unsigned getAssumedAddrSpace(const Value *V) const
If the specified generic pointer could be assumed as a pointer to a specific address space,...
Definition: TargetMachine.h:320
llvm::BasicTTIImplBase::getInlinerVectorBonusPercent
int getInlinerVectorBonusPercent()
Definition: BasicTTIImpl.h:511
llvm::ISD::FNEARBYINT
@ FNEARBYINT
Definition: ISDOpcodes.h:920
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:919
llvm::TargetTransformInfoImplBase::getCFInstrCost
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
Definition: TargetTransformInfoImpl.h:545
llvm::BasicTTIImplBase::getCacheLineSize
virtual unsigned getCacheLineSize() const
Definition: BasicTTIImpl.h:659
BitVector.h
llvm::TargetTransformInfoImplCRTPBase
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
Definition: TargetTransformInfoImpl.h:897
SmallPtrSet.h
llvm::BitVector
Definition: BitVector.h:75
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1353
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::Triple::getArch
ArchType getArch() const
Get the parsed architecture type of this triple.
Definition: Triple.h:337
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::BasicTTIImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1091
llvm::PartialUnrollingThreshold
cl::opt< unsigned > PartialUnrollingThreshold
llvm::BasicTTIImplBase::getEstimatedNumberOfCaseClusters
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
Definition: BasicTTIImpl.h:397
llvm::BasicTTIImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:921
llvm::None
const NoneType None
Definition: None.h:24
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::lltok::Kind
Kind
Definition: LLToken.h:18
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:979
Type.h
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:117
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::BasicTTIImplBase::shouldBuildLookupTables
bool shouldBuildLookupTables()
Definition: BasicTTIImpl.h:456
llvm::maxnum
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1306
LoopInfo.h
llvm::TargetTransformInfoImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask() const
Definition: TargetTransformInfoImpl.h:170
llvm::TargetTransformInfoImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I) const
Definition: TargetTransformInfoImpl.h:222
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:39
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:74
llvm::ISD::POST_INC
@ POST_INC
Definition: ISDOpcodes.h:1353
llvm::TargetTransformInfoImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Definition: TargetTransformInfoImpl.h:186
llvm::ISD::FPOW
@ FPOW
Definition: ISDOpcodes.h:911
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
llvm::TargetTransformInfo::SK_Reverse
@ SK_Reverse
Reverse the order of the vector.
Definition: TargetTransformInfo.h:873
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
llvm::TargetTransformInfo::CastContextHint::Normal
@ Normal
The cast is used with a normal load/store.
llvm::ISD::FMINIMUM
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:949
BasicBlock.h
llvm::APInt::slt
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1080
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:75
llvm::BasicTTIImplBase::isLegalAddressingMode
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:304
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::divideCeil
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:769
llvm::TargetTransformInfo::SK_InsertSubvector
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
Definition: TargetTransformInfo.h:878
llvm::TargetMachine::isNoopAddrSpaceCast
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Definition: TargetMachine.h:307
llvm::BasicTTIImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)
Definition: BasicTTIImpl.h:616
llvm::ISD::FLOG10
@ FLOG10
Definition: ISDOpcodes.h:914
llvm::BasicTTIImplBase::haveFastSqrt
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:487
llvm::TargetTransformInfo::MIM_PostInc
@ MIM_PostInc
Post-incrementing.
Definition: TargetTransformInfo.h:1338
llvm::BasicTTIImplBase::simplifyDemandedVectorEltsIntrinsic
Optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)
Definition: BasicTTIImpl.h:625
uint64_t
llvm::Type::getWithNewBitWidth
Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
Definition: DerivedTypes.h:722
llvm::TargetTransformInfo::LSRCost
Definition: TargetTransformInfo.h:416
llvm::TargetLoweringBase::isLoadExtLegal
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Definition: TargetLowering.h:1284
llvm::TargetLoweringBase::isTypeLegal
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Definition: TargetLowering.h:932
llvm::TargetLoweringBase::getLoadExtAction
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Definition: TargetLowering.h:1272
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
llvm::BasicTTIImplBase::getVScaleForTuning
Optional< unsigned > getVScaleForTuning() const
Definition: BasicTTIImpl.h:693
llvm::TargetTransformInfo::OK_UniformConstantValue
@ OK_UniformConstantValue
Definition: TargetTransformInfo.h:893
llvm::TargetLoweringBase::isSuitableForBitTests
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
Definition: TargetLowering.h:1234
llvm::BasicTTIImplBase::isIndexedLoadLegal
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:335
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::TargetLoweringBase::isOperationLegalOrPromote
bool isOperationLegalOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal using promotion.
Definition: TargetLowering.h:1170
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::TargetTransformInfoImplBase::simplifyDemandedUseBitsIntrinsic
Optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Definition: TargetTransformInfoImpl.h:180
llvm::TargetTransformInfo::UnrollingPreferences
Parameters that control the generic loop unrolling transformation.
Definition: TargetTransformInfo.h:430
llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:561
I
#define I(x, y, z)
Definition: MD5.cpp:58
getCalledFunction
static const Function * getCalledFunction(const Value *V, bool &IsNoBuiltin)
Definition: MemoryBuiltins.cpp:160
llvm::TargetTransformInfoImplBase::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:501
llvm::TargetTransformInfo::OperandValueProperties
OperandValueProperties
Additional properties of an operand's values.
Definition: TargetTransformInfo.h:898
llvm::BasicTTIImplBase::getGEPCost
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:391
llvm::LoopAccessInfo
Drive the analysis of memory accesses in the loop.
Definition: LoopAccessAnalysis.h:558
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:492
llvm::BasicTTIImplBase
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
ArrayRef.h
llvm::TargetTransformInfo::PeelingPreferences::AllowLoopNestsPeeling
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
Definition: TargetTransformInfo.h:545
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1384
maximum
Should compile r2 movcc movcs str strb mov lr r1 movcs movcc mov lr r1 str mov mov cmp r1 movlo r2 str bx lr r0 mov mov cmp r0 movhs r2 mov r1 bx lr Some of the NEON intrinsics may be appropriate for more general either as target independent intrinsics or perhaps elsewhere in the ARM backend Some of them may also be lowered to target independent and perhaps some new SDNodes could be added For maximum
Definition: README.txt:489
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::EVT::getIntegerVT
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
llvm::TargetLoweringBase::allowsMisalignedMemoryAccesses
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetLowering.h:1687
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::OptimizationRemarkEmitter::emit
void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
Definition: OptimizationRemarkEmitter.cpp:77
llvm::BasicTTIImplBase::getMaxVScale
Optional< unsigned > getMaxVScale() const
Definition: BasicTTIImpl.h:692
llvm::TargetTransformInfoImplBase::isLoweredToCall
bool isLoweredToCall(const Function *F) const
Definition: TargetTransformInfoImpl.h:121
llvm::CmpInst::BAD_ICMP_PREDICATE
@ BAD_ICMP_PREDICATE
Definition: InstrTypes.h:752
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::Type::isVoidTy
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
llvm::BasicTTIImplBase::getInterleavedMemoryOpCost
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:1240
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::TargetTransformInfo::SK_Splice
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
Definition: TargetTransformInfo.h:884
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:806
llvm::BasicTTIImpl::BasicTTIImpl
BasicTTIImpl(const TargetMachine *TM, const Function &F)
Definition: BasicTargetTransformInfo.cpp:32
llvm::ISD::POST_DEC
@ POST_DEC
Definition: ISDOpcodes.h:1353
llvm::TargetTransformInfo::OperandValueKind
OperandValueKind
Additional information about an operand's possible values.
Definition: TargetTransformInfo.h:890
llvm::ElementCount::isVector
bool isVector() const
One or more elements.
Definition: TypeSize.h:402
llvm::TargetSubtargetInfo::useAA
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine,...
Definition: TargetSubtargetInfo.cpp:56
llvm::MCSubtargetInfo::getMinPrefetchStride
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Return the minimum stride necessary to trigger software prefetching.
Definition: MCSubtargetInfo.cpp:363
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::BasicTTIImplBase::getMaxPrefetchIterationsAhead
virtual unsigned getMaxPrefetchIterationsAhead() const
Definition: BasicTTIImpl.h:675
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2139
llvm::TargetTransformInfo::PeelingPreferences::PeelCount
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Definition: TargetTransformInfo.h:541
llvm::ISD::FMAXIMUM
@ FMAXIMUM
Definition: ISDOpcodes.h:950
llvm::BasicTTIImplBase::emitGetActiveLaneMask
bool emitGetActiveLaneMask()
Definition: BasicTTIImpl.h:607
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::LoopInfo
Definition: LoopInfo.h:1086
llvm::ISD::PRE_INC
@ PRE_INC
Definition: ISDOpcodes.h:1353
llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:33
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1612
DataLayout.h
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::TargetMachine::getPredicatedAddrSpace
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
If the specified predicate checks whether a generic pointer falls within a specified address space,...
Definition: TargetMachine.h:330
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:698
llvm::CmpInst::ICMP_ULT
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:744
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::ISD::SREM
@ SREM
Definition: ISDOpcodes.h:244
llvm::BasicTTIImplBase::getRegUsageForType
InstructionCost getRegUsageForType(Type *Ty)
Definition: BasicTTIImpl.h:385
llvm::TargetTransformInfoImplCRTPBase::getInstructionLatency
InstructionCost getInstructionLatency(const Instruction *I)
Definition: TargetTransformInfoImpl.h:1229
llvm::TargetLoweringBase::AddrMode::BaseGV
GlobalValue * BaseGV
Definition: TargetLowering.h:2465
CostKind
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
TargetSubtargetInfo.h
trunc
We have fiadd patterns now but the followings have the same cost and complexity We need a way to specify the later is more profitable def def The FP stackifier should handle simple permutates to reduce number of shuffle e g trunc
Definition: README-FPStack.txt:63
llvm::ISD::FEXP
@ FEXP
Definition: ISDOpcodes.h:915
llvm::PICLevel::Level
Level
Definition: CodeGen.h:33
llvm::TargetLoweringBase::InstructionOpcodeToISD
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Definition: TargetLoweringBase.cpp:1728
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:224
llvm::InstructionCost::isValid
bool isValid() const
Definition: InstructionCost.h:79
llvm::TargetTransformInfoImplBase::getCmpSelInstrCost
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
Definition: TargetTransformInfoImpl.h:554
llvm::ISD::FEXP2
@ FEXP2
Definition: ISDOpcodes.h:916
llvm::BasicTTIImplBase::getPeelingPreferences
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, TTI::PeelingPreferences &PP)
Definition: BasicTTIImpl.h:585
llvm::TargetTransformInfo::MIM_Unindexed
@ MIM_Unindexed
No indexing.
Definition: TargetTransformInfo.h:1335
llvm::VectorType::getHalfElementsVectorType
static VectorType * getHalfElementsVectorType(VectorType *VTy)
This static method returns a VectorType with half as many elements as the input type and the same ele...
Definition: DerivedTypes.h:493
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:429
llvm::TargetTransformInfo::OK_AnyValue
@ OK_AnyValue
Definition: TargetTransformInfo.h:891
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, bool Extract)
Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
Definition: BasicTTIImpl.h:723
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:280
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::TargetTransformInfo::SK_Transpose
@ SK_Transpose
Transpose two vectors.
Definition: TargetTransformInfo.h:877
llvm::TargetSubtargetInfo
TargetSubtargetInfo - Generic base class for all target subtargets.
Definition: TargetSubtargetInfo.h:60
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1384
llvm::BasicTTIImplBase::useAA
bool useAA() const
Definition: BasicTTIImpl.h:378
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
llvm::SmallPtrSetImplBase::size
size_type size() const
Definition: SmallPtrSet.h:93
llvm::TargetTransformInfo::MIM_PreInc
@ MIM_PreInc
Pre-incrementing.
Definition: TargetTransformInfo.h:1336
llvm::TargetLoweringBase::isFreeAddrSpaceCast
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
Definition: TargetLoweringBase.cpp:908
llvm::TargetTransformInfoImplBase::getArithmeticInstrCost
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueKind Opd1Info, TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, TTI::OperandValueProperties Opd2PropInfo, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
Definition: TargetTransformInfoImpl.h:471
llvm::ISD::FSQRT
@ FSQRT
Definition: ISDOpcodes.h:906
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
llvm::TargetLoweringBase::getTargetMachine
const TargetMachine & getTargetMachine() const
Definition: TargetLowering.h:345
llvm::BasicTTIImplBase::isHardwareLoopProfitable
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:593
llvm::APIntOps::umax
const APInt & umax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be unsigned.
Definition: APInt.h:2154
Constant.h
llvm::TargetLoweringBase::AddrMode::BaseOffs
int64_t BaseOffs
Definition: TargetLowering.h:2466
llvm::minnum
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1295
llvm::TargetLoweringBase::isFAbsFree
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
Definition: TargetLowering.h:2847
llvm::ISD::STRICT_FMA
@ STRICT_FMA
Definition: ISDOpcodes.h:405
llvm::ISD::FMAXNUM
@ FMAXNUM
Definition: ISDOpcodes.h:937
llvm::BasicTTIImpl
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:2315
llvm::KnownBits
Definition: KnownBits.h:23
llvm::BasicTTIImplBase::getCallInstrCost
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:2094
llvm::TargetLoweringBase::AddrMode::Scale
int64_t Scale
Definition: TargetLowering.h:2468
llvm::TargetLoweringBase::isIndexedStoreLegal
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Definition: TargetLowering.h:1351
llvm::TargetTransformInfo::UnrollingPreferences::UpperBound
bool UpperBound
Allow using trip count upper bound to unroll loops.
Definition: TargetTransformInfo.h:497
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:909
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:917
llvm::ShuffleVectorInst::isSelectMask
static bool isSelectMask(ArrayRef< int > Mask)
Return true if this shuffle mask chooses elements from its source vectors without lane crossings.
Definition: Instructions.cpp:2246
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:197
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:908
ISDOpcodes.h
llvm::TypeSize
Definition: TypeSize.h:421
llvm::MCSchedModel::DefaultLoadLatency
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
Casting.h
llvm::BasicTTIImplBase::getMaskedMemoryOpCost
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:1224
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::TargetLoweringBase::isOperationExpand
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Definition: TargetLowering.h:1259
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::LoopBase::getHeader
BlockT * getHeader() const
Definition: LoopInfo.h:104
llvm::BasicTTIImplBase::isProfitableToHoist
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:374
llvm::BasicTTIImplBase::isLegalAddImmediate
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:296
llvm::BasicTTIImplBase::getScalarizationOverhead
InstructionCost getScalarizationOverhead(VectorType *RetTy, ArrayRef< const Value * > Args, ArrayRef< Type * > Tys)
Estimate the overhead of scalarizing the inputs and outputs of an instruction, with return type RetTy...
Definition: BasicTTIImpl.h:761
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::Function::isTargetIntrinsic
bool isTargetIntrinsic() const
isTargetIntrinsic - Returns true if this function is an intrinsic and the intrinsic is specific to a ...
Definition: Function.cpp:744
llvm::ISD::SDIV
@ SDIV
Definition: ISDOpcodes.h:242
llvm::log2
static double log2(double V)
Definition: AMDGPULibCalls.cpp:802
llvm::TargetLoweringBase::getScalingFactorCost
virtual InstructionCost getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Definition: TargetLowering.h:2493
llvm::BasicTTIImplBase::collectFlatAddressOperands
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Definition: BasicTTIImpl.h:273
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:960
llvm::InstCombiner
The core instruction combiner logic.
Definition: InstCombiner.h:45
llvm::TargetTransformInfoImplBase::instCombineIntrinsic
Optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Definition: TargetTransformInfoImpl.h:174
llvm::TargetLoweringBase::isOperationLegalOrCustomOrPromote
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Definition: TargetLowering.h:1184
llvm::TargetTransformInfoImplBase::DL
const DataLayout & DL
Definition: TargetTransformInfoImpl.h:37
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:46
llvm::HardwareLoopInfo
Attributes of a target dependent hardware loop.
Definition: TargetTransformInfo.h:94
llvm::InstructionCost::getInvalid
static InstructionCost getInvalid(CostType Val=0)
Definition: InstructionCost.h:73
llvm::BasicTTIImplBase::getMinMaxReductionCost
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, TTI::TargetCostKind CostKind)
Try to calculate op costs for min/max reduction operations.
Definition: BasicTTIImpl.h:2229
llvm::CodeModel::Large
@ Large
Definition: CodeGen.h:28
llvm::TargetLoweringBase::isTruncateFree
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: TargetLowering.h:2622
llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:690
llvm::BasicTTIImplBase::getGatherScatterOpCost
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1231
Instructions.h
llvm::IntrinsicCostAttributes::getID
Intrinsic::ID getID() const
Definition: TargetTransformInfo.h:147
llvm::TargetLoweringBase::areJTsAllowed
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
Definition: TargetLowering.h:1202
SmallVector.h
llvm::TargetTransformInfo::RegisterKind
RegisterKind
Definition: TargetTransformInfo.h:919
llvm::ISD::UREM
@ UREM
Definition: ISDOpcodes.h:245
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::CmpInst::ICMP_UGT
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:742
N
#define N
llvm::BasicTTIImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:1380
llvm::ISD::BITREVERSE
@ BITREVERSE
Definition: ISDOpcodes.h:704
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::ISD::CTTZ
@ CTTZ
Definition: ISDOpcodes.h:701
llvm::BasicTTIImplBase::getNumberOfParts
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:2100
TargetTransformInfo.h
llvm::BasicTTIImplBase::getMemoryOpCost
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:1180
llvm::TargetLoweringBase::AddrMode
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
Definition: TargetLowering.h:2464
llvm::TargetLoweringBase::isZExtFree
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
Definition: TargetLowering.h:2711
llvm::TargetTransformInfo::MIM_PostDec
@ MIM_PostDec
Post-decrementing.
Definition: TargetTransformInfo.h:1339
llvm::SmallVectorImpl< int >
llvm::BasicTTIImplBase::getAssumedAddrSpace
unsigned getAssumedAddrSpace(const Value *V) const
Definition: BasicTTIImpl.h:282
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::APInt::sgt
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1151
DerivedTypes.h
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::TargetTransformInfo::UnrollingPreferences::OptSizeThreshold
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
Definition: TargetTransformInfo.h:452
llvm::BasicTTIImplBase::preferPredicateOverEpilogue
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, const LoopAccessInfo *LAI)
Definition: BasicTTIImpl.h:600
llvm::BasicTTIImplBase::getTypeBasedIntrinsicInstrCost
InstructionCost getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:1584
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::BasicTTIImplBase::getScalingFactorCost
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:359
llvm::MCSubtargetInfo::getCacheLineSize
virtual Optional< unsigned > getCacheLineSize(unsigned Level) const
Return the target cache line size in bytes at a given level.
Definition: MCSubtargetInfo.cpp:347
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::TargetTransformInfo::getOperandInfo
static OperandValueKind getOperandInfo(const Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
Definition: TargetTransformInfo.cpp:692
llvm::TargetTransformInfo::TCC_Basic
@ TCC_Basic
The cost of a typical 'add' instruction.
Definition: TargetTransformInfo.h:262
llvm::SwitchInst
Multiway switch.
Definition: Instructions.h:3243
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::TargetLoweringBase::getValueType
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Definition: TargetLowering.h:1455
llvm::IntrinsicCostAttributes::isTypeBasedOnly
bool isTypeBasedOnly() const
Definition: TargetTransformInfo.h:155
llvm::MCSubtargetInfo::getCacheAssociativity
virtual Optional< unsigned > getCacheAssociativity(unsigned Level) const
Return the cache associatvity for the given level of cache.
Definition: MCSubtargetInfo.cpp:343
llvm::TargetLoweringBase::TypeSplitVector
@ TypeSplitVector
Definition: TargetLowering.h:212
llvm::TargetTransformInfoImplBase::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfoImpl.h:607
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:918
llvm::TargetTransformInfo::OK_NonUniformConstantValue
@ OK_NonUniformConstantValue
Definition: TargetTransformInfo.h:894
llvm::BasicTTIImplBase::getOrderedReductionCost
InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, TTI::TargetCostKind CostKind)
Try to calculate the cost of performing strict (in-order) reductions, which involves doing a sequence...
Definition: BasicTTIImpl.h:2202
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1281
llvm::ISD::FCANONICALIZE
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:499
llvm::MachineMemOperand::MONone
@ MONone
Definition: MachineMemOperand.h:131
llvm::BasicTTIImplBase::hasBranchDivergence
bool hasBranchDivergence()
Definition: BasicTTIImpl.h:260
llvm::ISD::FLOG
@ FLOG
Definition: ISDOpcodes.h:912
llvm::BasicTTIImplBase::getExtendedAddReductionCost
InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind)
Definition: BasicTTIImpl.h:2287
llvm::Type::getContainedType
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:338
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:211
llvm::BasicTTIImplBase::isProfitableLSRChainElement
bool isProfitableLSRChainElement(Instruction *I)
Definition: BasicTTIImpl.h:355
llvm::IntrinsicCostAttributes::getArgs
const SmallVectorImpl< const Value * > & getArgs() const
Definition: TargetTransformInfo.h:152
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::ShuffleVectorInst::isTransposeMask
static bool isTransposeMask(ArrayRef< int > Mask)
Return true if this shuffle mask is a transpose mask.
Definition: Instructions.cpp:2259
llvm::TargetTransformInfo::SK_ExtractSubvector
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
Definition: TargetTransformInfo.h:879
llvm::Triple::aarch64
@ aarch64
Definition: Triple.h:51
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2144
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:703
llvm::codeview::PublicSymFlags::Function
@ Function
llvm::TargetLoweringBase::getTypeAction
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
Definition: TargetLowering.h:966
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::BasicTTIImplBase::getShuffleCost
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=None)
Definition: BasicTTIImpl.h:892
llvm::ISD::VSELECT
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:722