LLVM  10.0.0svn
TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/CFG.h"
13 #include "llvm/IR/DataLayout.h"
14 #include "llvm/IR/Instruction.h"
15 #include "llvm/IR/Instructions.h"
16 #include "llvm/IR/IntrinsicInst.h"
17 #include "llvm/IR/Module.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/PatternMatch.h"
22 #include "llvm/Analysis/CFG.h"
24 #include <utility>
25 
26 using namespace llvm;
27 using namespace PatternMatch;
28 
29 #define DEBUG_TYPE "tti"
30 
31 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
32  cl::Hidden,
33  cl::desc("Recognize reduction patterns."));
34 
35 namespace {
36 /// No-op implementation of the TTI interface using the utility base
37 /// classes.
38 ///
39 /// This is used when no target specific information is available.
40 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
41  explicit NoTTIImpl(const DataLayout &DL)
42  : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
43 };
44 }
45 
47  // If the loop has irreducible control flow, it can not be converted to
48  // Hardware loop.
49  LoopBlocksRPO RPOT(L);
50  RPOT.perform(&LI);
51  if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
52  return false;
53  return true;
54 }
55 
57  LoopInfo &LI, DominatorTree &DT,
58  bool ForceNestedLoop,
59  bool ForceHardwareLoopPHI) {
60  SmallVector<BasicBlock *, 4> ExitingBlocks;
61  L->getExitingBlocks(ExitingBlocks);
62 
63  for (BasicBlock *BB : ExitingBlocks) {
64  // If we pass the updated counter back through a phi, we need to know
65  // which latch the updated value will be coming from.
66  if (!L->isLoopLatch(BB)) {
67  if (ForceHardwareLoopPHI || CounterInReg)
68  continue;
69  }
70 
71  const SCEV *EC = SE.getExitCount(L, BB);
72  if (isa<SCEVCouldNotCompute>(EC))
73  continue;
74  if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
75  if (ConstEC->getValue()->isZero())
76  continue;
77  } else if (!SE.isLoopInvariant(EC, L))
78  continue;
79 
80  if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
81  continue;
82 
83  // If this exiting block is contained in a nested loop, it is not eligible
84  // for insertion of the branch-and-decrement since the inner loop would
85  // end up messing up the value in the CTR.
86  if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
87  continue;
88 
89  // We now have a loop-invariant count of loop iterations (which is not the
90  // constant zero) for which we know that this loop will not exit via this
91  // existing block.
92 
93  // We need to make sure that this block will run on every loop iteration.
94  // For this to be true, we must dominate all blocks with backedges. Such
95  // blocks are in-loop predecessors to the header block.
96  bool NotAlways = false;
97  for (BasicBlock *Pred : predecessors(L->getHeader())) {
98  if (!L->contains(Pred))
99  continue;
100 
101  if (!DT.dominates(BB, Pred)) {
102  NotAlways = true;
103  break;
104  }
105  }
106 
107  if (NotAlways)
108  continue;
109 
110  // Make sure this blocks ends with a conditional branch.
111  Instruction *TI = BB->getTerminator();
112  if (!TI)
113  continue;
114 
115  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
116  if (!BI->isConditional())
117  continue;
118 
119  ExitBranch = BI;
120  } else
121  continue;
122 
123  // Note that this block may not be the loop latch block, even if the loop
124  // has a latch block.
125  ExitBlock = BB;
126  ExitCount = EC;
127  break;
128  }
129 
130  if (!ExitBlock)
131  return false;
132  return true;
133 }
134 
136  : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
137 
139 
141  : TTIImpl(std::move(Arg.TTIImpl)) {}
142 
144  TTIImpl = std::move(RHS.TTIImpl);
145  return *this;
146 }
147 
149  Type *OpTy) const {
150  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
151  assert(Cost >= 0 && "TTI should not produce negative costs!");
152  return Cost;
153 }
154 
156  const User *U) const {
157  int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
158  assert(Cost >= 0 && "TTI should not produce negative costs!");
159  return Cost;
160 }
161 
164  const User *U) const {
165  int Cost = TTIImpl->getCallCost(F, Arguments, U);
166  assert(Cost >= 0 && "TTI should not produce negative costs!");
167  return Cost;
168 }
169 
171  return TTIImpl->getInliningThresholdMultiplier();
172 }
173 
175  return TTIImpl->getInlinerVectorBonusPercent();
176 }
177 
178 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
180  return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
181 }
182 
184  const Value *Src) const {
185  return TTIImpl->getExtCost(I, Src);
186 }
187 
190  const User *U) const {
191  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
192  assert(Cost >= 0 && "TTI should not produce negative costs!");
193  return Cost;
194 }
195 
196 unsigned
198  unsigned &JTSize) const {
199  return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
200 }
201 
204  int Cost = TTIImpl->getUserCost(U, Operands);
205  assert(Cost >= 0 && "TTI should not produce negative costs!");
206  return Cost;
207 }
208 
210  return TTIImpl->hasBranchDivergence();
211 }
212 
214  return TTIImpl->isSourceOfDivergence(V);
215 }
216 
218  return TTIImpl->isAlwaysUniform(V);
219 }
220 
222  return TTIImpl->getFlatAddressSpace();
223 }
224 
226  SmallVectorImpl<int> &OpIndexes, Intrinsic::ID IID) const {
227  return TTIImpl->collectFlatAddressOperands(OpIndexes, IID);
228 }
229 
231  IntrinsicInst *II, Value *OldV, Value *NewV) const {
232  return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
233 }
234 
236  return TTIImpl->isLoweredToCall(F);
237 }
238 
240  Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
241  TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
242  return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
243 }
244 
246  Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
247  return TTIImpl->getUnrollingPreferences(L, SE, UP);
248 }
249 
251  return TTIImpl->isLegalAddImmediate(Imm);
252 }
253 
255  return TTIImpl->isLegalICmpImmediate(Imm);
256 }
257 
259  int64_t BaseOffset,
260  bool HasBaseReg,
261  int64_t Scale,
262  unsigned AddrSpace,
263  Instruction *I) const {
264  return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
265  Scale, AddrSpace, I);
266 }
267 
269  return TTIImpl->isLSRCostLess(C1, C2);
270 }
271 
273  return TTIImpl->canMacroFuseCmp();
274 }
275 
277  ScalarEvolution *SE, LoopInfo *LI,
279  TargetLibraryInfo *LibInfo) const {
280  return TTIImpl->canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
281 }
282 
284  return TTIImpl->shouldFavorPostInc();
285 }
286 
288  return TTIImpl->shouldFavorBackedgeIndex(L);
289 }
290 
292  MaybeAlign Alignment) const {
293  return TTIImpl->isLegalMaskedStore(DataType, Alignment);
294 }
295 
297  MaybeAlign Alignment) const {
298  return TTIImpl->isLegalMaskedLoad(DataType, Alignment);
299 }
300 
302  Align Alignment) const {
303  return TTIImpl->isLegalNTStore(DataType, Alignment);
304 }
305 
307  return TTIImpl->isLegalNTLoad(DataType, Alignment);
308 }
309 
311  return TTIImpl->isLegalMaskedGather(DataType);
312 }
313 
315  return TTIImpl->isLegalMaskedScatter(DataType);
316 }
317 
319  return TTIImpl->isLegalMaskedCompressStore(DataType);
320 }
321 
323  return TTIImpl->isLegalMaskedExpandLoad(DataType);
324 }
325 
326 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
327  return TTIImpl->hasDivRemOp(DataType, IsSigned);
328 }
329 
331  unsigned AddrSpace) const {
332  return TTIImpl->hasVolatileVariant(I, AddrSpace);
333 }
334 
336  return TTIImpl->prefersVectorizedAddressing();
337 }
338 
340  int64_t BaseOffset,
341  bool HasBaseReg,
342  int64_t Scale,
343  unsigned AddrSpace) const {
344  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
345  Scale, AddrSpace);
346  assert(Cost >= 0 && "TTI should not produce negative costs!");
347  return Cost;
348 }
349 
351  return TTIImpl->LSRWithInstrQueries();
352 }
353 
355  return TTIImpl->isTruncateFree(Ty1, Ty2);
356 }
357 
359  return TTIImpl->isProfitableToHoist(I);
360 }
361 
362 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
363 
365  return TTIImpl->isTypeLegal(Ty);
366 }
367 
369  return TTIImpl->shouldBuildLookupTables();
370 }
372  return TTIImpl->shouldBuildLookupTablesForConstant(C);
373 }
374 
376  return TTIImpl->useColdCCForColdCall(F);
377 }
378 
379 unsigned TargetTransformInfo::
380 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
381  return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
382 }
383 
384 unsigned TargetTransformInfo::
386  unsigned VF) const {
387  return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
388 }
389 
391  return TTIImpl->supportsEfficientVectorElementLoadStore();
392 }
393 
394 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
395  return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
396 }
397 
399 TargetTransformInfo::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
400  return TTIImpl->enableMemCmpExpansion(OptSize, IsZeroCmp);
401 }
402 
404  return TTIImpl->enableInterleavedAccessVectorization();
405 }
406 
408  return TTIImpl->enableMaskedInterleavedAccessVectorization();
409 }
410 
412  return TTIImpl->isFPVectorizationPotentiallyUnsafe();
413 }
414 
416  unsigned BitWidth,
417  unsigned AddressSpace,
418  unsigned Alignment,
419  bool *Fast) const {
420  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
421  Alignment, Fast);
422 }
423 
425 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
426  return TTIImpl->getPopcntSupport(IntTyWidthInBit);
427 }
428 
430  return TTIImpl->haveFastSqrt(Ty);
431 }
432 
434  return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
435 }
436 
438  int Cost = TTIImpl->getFPOpCost(Ty);
439  assert(Cost >= 0 && "TTI should not produce negative costs!");
440  return Cost;
441 }
442 
443 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
444  const APInt &Imm,
445  Type *Ty) const {
446  int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
447  assert(Cost >= 0 && "TTI should not produce negative costs!");
448  return Cost;
449 }
450 
451 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
452  int Cost = TTIImpl->getIntImmCost(Imm, Ty);
453  assert(Cost >= 0 && "TTI should not produce negative costs!");
454  return Cost;
455 }
456 
457 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
458  const APInt &Imm, Type *Ty) const {
459  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
460  assert(Cost >= 0 && "TTI should not produce negative costs!");
461  return Cost;
462 }
463 
465  const APInt &Imm, Type *Ty) const {
466  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
467  assert(Cost >= 0 && "TTI should not produce negative costs!");
468  return Cost;
469 }
470 
471 unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
472  return TTIImpl->getNumberOfRegisters(ClassID);
473 }
474 
476  return TTIImpl->getRegisterClassForType(Vector, Ty);
477 }
478 
479 const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
480  return TTIImpl->getRegisterClassName(ClassID);
481 }
482 
484  return TTIImpl->getRegisterBitWidth(Vector);
485 }
486 
488  return TTIImpl->getMinVectorRegisterBitWidth();
489 }
490 
492  return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
493 }
494 
495 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
496  return TTIImpl->getMinimumVF(ElemWidth);
497 }
498 
500  const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
501  return TTIImpl->shouldConsiderAddressTypePromotion(
502  I, AllowPromotionWithoutCommonHeader);
503 }
504 
506  return TTIImpl->getCacheLineSize();
507 }
508 
510  const {
511  return TTIImpl->getCacheSize(Level);
512 }
513 
515  CacheLevel Level) const {
516  return TTIImpl->getCacheAssociativity(Level);
517 }
518 
520  return TTIImpl->getPrefetchDistance();
521 }
522 
524  return TTIImpl->getMinPrefetchStride();
525 }
526 
528  return TTIImpl->getMaxPrefetchIterationsAhead();
529 }
530 
531 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
532  return TTIImpl->getMaxInterleaveFactor(VF);
533 }
534 
537  OperandValueKind OpInfo = OK_AnyValue;
538  OpProps = OP_None;
539 
540  if (auto *CI = dyn_cast<ConstantInt>(V)) {
541  if (CI->getValue().isPowerOf2())
542  OpProps = OP_PowerOf2;
544  }
545 
546  // A broadcast shuffle creates a uniform value.
547  // TODO: Add support for non-zero index broadcasts.
548  // TODO: Add support for different source vector width.
549  if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
550  if (ShuffleInst->isZeroEltSplat())
551  OpInfo = OK_UniformValue;
552 
553  const Value *Splat = getSplatValue(V);
554 
555  // Check for a splat of a constant or for a non uniform vector of constants
556  // and check if the constant(s) are all powers of two.
557  if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
559  if (Splat) {
560  OpInfo = OK_UniformConstantValue;
561  if (auto *CI = dyn_cast<ConstantInt>(Splat))
562  if (CI->getValue().isPowerOf2())
563  OpProps = OP_PowerOf2;
564  } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
565  OpProps = OP_PowerOf2;
566  for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
567  if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
568  if (CI->getValue().isPowerOf2())
569  continue;
570  OpProps = OP_None;
571  break;
572  }
573  }
574  }
575 
576  // Check for a splat of a uniform value. This is not loop aware, so return
577  // true only for the obviously uniform cases (argument, globalvalue)
578  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
579  OpInfo = OK_UniformValue;
580 
581  return OpInfo;
582 }
583 
585  unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
586  OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
587  OperandValueProperties Opd2PropInfo,
589  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
590  Opd1PropInfo, Opd2PropInfo, Args);
591  assert(Cost >= 0 && "TTI should not produce negative costs!");
592  return Cost;
593 }
594 
596  Type *SubTp) const {
597  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
598  assert(Cost >= 0 && "TTI should not produce negative costs!");
599  return Cost;
600 }
601 
602 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
603  Type *Src, const Instruction *I) const {
604  assert ((I == nullptr || I->getOpcode() == Opcode) &&
605  "Opcode should reflect passed instruction.");
606  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
607  assert(Cost >= 0 && "TTI should not produce negative costs!");
608  return Cost;
609 }
610 
612  VectorType *VecTy,
613  unsigned Index) const {
614  int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
615  assert(Cost >= 0 && "TTI should not produce negative costs!");
616  return Cost;
617 }
618 
619 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
620  int Cost = TTIImpl->getCFInstrCost(Opcode);
621  assert(Cost >= 0 && "TTI should not produce negative costs!");
622  return Cost;
623 }
624 
625 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
626  Type *CondTy, const Instruction *I) const {
627  assert ((I == nullptr || I->getOpcode() == Opcode) &&
628  "Opcode should reflect passed instruction.");
629  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
630  assert(Cost >= 0 && "TTI should not produce negative costs!");
631  return Cost;
632 }
633 
635  unsigned Index) const {
636  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
637  assert(Cost >= 0 && "TTI should not produce negative costs!");
638  return Cost;
639 }
640 
641 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
642  unsigned Alignment,
643  unsigned AddressSpace,
644  const Instruction *I) const {
645  assert ((I == nullptr || I->getOpcode() == Opcode) &&
646  "Opcode should reflect passed instruction.");
647  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
648  assert(Cost >= 0 && "TTI should not produce negative costs!");
649  return Cost;
650 }
651 
653  unsigned Alignment,
654  unsigned AddressSpace) const {
655  int Cost =
656  TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
657  assert(Cost >= 0 && "TTI should not produce negative costs!");
658  return Cost;
659 }
660 
662  Value *Ptr, bool VariableMask,
663  unsigned Alignment) const {
664  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
665  Alignment);
666  assert(Cost >= 0 && "TTI should not produce negative costs!");
667  return Cost;
668 }
669 
671  unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
672  unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
673  bool UseMaskForGaps) const {
674  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
675  Alignment, AddressSpace,
676  UseMaskForCond,
677  UseMaskForGaps);
678  assert(Cost >= 0 && "TTI should not produce negative costs!");
679  return Cost;
680 }
681 
684  unsigned ScalarizationCostPassed) const {
685  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
686  ScalarizationCostPassed);
687  assert(Cost >= 0 && "TTI should not produce negative costs!");
688  return Cost;
689 }
690 
692  ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
693  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
694  assert(Cost >= 0 && "TTI should not produce negative costs!");
695  return Cost;
696 }
697 
699  ArrayRef<Type *> Tys) const {
700  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
701  assert(Cost >= 0 && "TTI should not produce negative costs!");
702  return Cost;
703 }
704 
706  return TTIImpl->getNumberOfParts(Tp);
707 }
708 
710  ScalarEvolution *SE,
711  const SCEV *Ptr) const {
712  int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
713  assert(Cost >= 0 && "TTI should not produce negative costs!");
714  return Cost;
715 }
716 
718  int Cost = TTIImpl->getMemcpyCost(I);
719  assert(Cost >= 0 && "TTI should not produce negative costs!");
720  return Cost;
721 }
722 
724  bool IsPairwiseForm) const {
725  int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
726  assert(Cost >= 0 && "TTI should not produce negative costs!");
727  return Cost;
728 }
729 
731  bool IsPairwiseForm,
732  bool IsUnsigned) const {
733  int Cost =
734  TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
735  assert(Cost >= 0 && "TTI should not produce negative costs!");
736  return Cost;
737 }
738 
739 unsigned
741  return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
742 }
743 
745  MemIntrinsicInfo &Info) const {
746  return TTIImpl->getTgtMemIntrinsic(Inst, Info);
747 }
748 
750  return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
751 }
752 
754  IntrinsicInst *Inst, Type *ExpectedType) const {
755  return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
756 }
757 
759  Value *Length,
760  unsigned SrcAlign,
761  unsigned DestAlign) const {
762  return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
763  DestAlign);
764 }
765 
768  unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
769  TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
770  SrcAlign, DestAlign);
771 }
772 
774  const Function *Callee) const {
775  return TTIImpl->areInlineCompatible(Caller, Callee);
776 }
777 
779  const Function *Caller, const Function *Callee,
781  return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
782 }
783 
785  Type *Ty) const {
786  return TTIImpl->isIndexedLoadLegal(Mode, Ty);
787 }
788 
790  Type *Ty) const {
791  return TTIImpl->isIndexedStoreLegal(Mode, Ty);
792 }
793 
795  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
796 }
797 
799  return TTIImpl->isLegalToVectorizeLoad(LI);
800 }
801 
803  return TTIImpl->isLegalToVectorizeStore(SI);
804 }
805 
807  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
808  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
809  AddrSpace);
810 }
811 
813  unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
814  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
815  AddrSpace);
816 }
817 
819  unsigned LoadSize,
820  unsigned ChainSizeInBytes,
821  VectorType *VecTy) const {
822  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
823 }
824 
826  unsigned StoreSize,
827  unsigned ChainSizeInBytes,
828  VectorType *VecTy) const {
829  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
830 }
831 
833  Type *Ty, ReductionFlags Flags) const {
834  return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
835 }
836 
838  return TTIImpl->shouldExpandReduction(II);
839 }
840 
842  return TTIImpl->getGISelRematGlobalCost();
843 }
844 
845 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
846  return TTIImpl->getInstructionLatency(I);
847 }
848 
850  unsigned Level) {
851  // We don't need a shuffle if we just want to have element 0 in position 0 of
852  // the vector.
853  if (!SI && Level == 0 && IsLeft)
854  return true;
855  else if (!SI)
856  return false;
857 
859 
860  // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
861  // we look at the left or right side.
862  for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
863  Mask[i] = val;
864 
865  SmallVector<int, 16> ActualMask = SI->getShuffleMask();
866  return Mask == ActualMask;
867 }
868 
869 namespace {
870 /// Kind of the reduction data.
872  RK_None, /// Not a reduction.
873  RK_Arithmetic, /// Binary reduction data.
874  RK_MinMax, /// Min/max reduction data.
875  RK_UnsignedMinMax, /// Unsigned min/max reduction data.
876 };
877 /// Contains opcode + LHS/RHS parts of the reduction operations.
878 struct ReductionData {
879  ReductionData() = delete;
880  ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
881  : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
882  assert(Kind != RK_None && "expected binary or min/max reduction only.");
883  }
884  unsigned Opcode = 0;
885  Value *LHS = nullptr;
886  Value *RHS = nullptr;
887  ReductionKind Kind = RK_None;
888  bool hasSameData(ReductionData &RD) const {
889  return Kind == RD.Kind && Opcode == RD.Opcode;
890  }
891 };
892 } // namespace
893 
895  Value *L, *R;
896  if (m_BinOp(m_Value(L), m_Value(R)).match(I))
897  return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
898  if (auto *SI = dyn_cast<SelectInst>(I)) {
899  if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
900  m_SMax(m_Value(L), m_Value(R)).match(SI) ||
901  m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
902  m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
903  m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
904  m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
905  auto *CI = cast<CmpInst>(SI->getCondition());
906  return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
907  }
908  if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
909  m_UMax(m_Value(L), m_Value(R)).match(SI)) {
910  auto *CI = cast<CmpInst>(SI->getCondition());
911  return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
912  }
913  }
914  return llvm::None;
915 }
916 
918  unsigned Level,
919  unsigned NumLevels) {
920  // Match one level of pairwise operations.
921  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
922  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
923  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
924  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
925  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
926  if (!I)
927  return RK_None;
928 
929  assert(I->getType()->isVectorTy() && "Expecting a vector type");
930 
932  if (!RD)
933  return RK_None;
934 
936  if (!LS && Level)
937  return RK_None;
939  if (!RS && Level)
940  return RK_None;
941 
942  // On level 0 we can omit one shufflevector instruction.
943  if (!Level && !RS && !LS)
944  return RK_None;
945 
946  // Shuffle inputs must match.
947  Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
948  Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
949  Value *NextLevelOp = nullptr;
950  if (NextLevelOpR && NextLevelOpL) {
951  // If we have two shuffles their operands must match.
952  if (NextLevelOpL != NextLevelOpR)
953  return RK_None;
954 
955  NextLevelOp = NextLevelOpL;
956  } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
957  // On the first level we can omit the shufflevector <0, undef,...>. So the
958  // input to the other shufflevector <1, undef> must match with one of the
959  // inputs to the current binary operation.
960  // Example:
961  // %NextLevelOpL = shufflevector %R, <1, undef ...>
962  // %BinOp = fadd %NextLevelOpL, %R
963  if (NextLevelOpL && NextLevelOpL != RD->RHS)
964  return RK_None;
965  else if (NextLevelOpR && NextLevelOpR != RD->LHS)
966  return RK_None;
967 
968  NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
969  } else
970  return RK_None;
971 
972  // Check that the next levels binary operation exists and matches with the
973  // current one.
974  if (Level + 1 != NumLevels) {
975  Optional<ReductionData> NextLevelRD =
976  getReductionData(cast<Instruction>(NextLevelOp));
977  if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
978  return RK_None;
979  }
980 
981  // Shuffle mask for pairwise operation must match.
982  if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
983  if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
984  return RK_None;
985  } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
986  if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
987  return RK_None;
988  } else {
989  return RK_None;
990  }
991 
992  if (++Level == NumLevels)
993  return RD->Kind;
994 
995  // Match next level.
996  return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
997  NumLevels);
998 }
999 
1001  unsigned &Opcode, Type *&Ty) {
1002  if (!EnableReduxCost)
1003  return RK_None;
1004 
1005  // Need to extract the first element.
1006  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1007  unsigned Idx = ~0u;
1008  if (CI)
1009  Idx = CI->getZExtValue();
1010  if (Idx != 0)
1011  return RK_None;
1012 
1013  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1014  if (!RdxStart)
1015  return RK_None;
1017  if (!RD)
1018  return RK_None;
1019 
1020  Type *VecTy = RdxStart->getType();
1021  unsigned NumVecElems = VecTy->getVectorNumElements();
1022  if (!isPowerOf2_32(NumVecElems))
1023  return RK_None;
1024 
1025  // We look for a sequence of shuffle,shuffle,add triples like the following
1026  // that builds a pairwise reduction tree.
1027  //
1028  // (X0, X1, X2, X3)
1029  // (X0 + X1, X2 + X3, undef, undef)
1030  // ((X0 + X1) + (X2 + X3), undef, undef, undef)
1031  //
1032  // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1033  // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1034  // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1035  // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1036  // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1037  // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1038  // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1039  // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1040  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1041  // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1042  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1043  if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1044  RK_None)
1045  return RK_None;
1046 
1047  Opcode = RD->Opcode;
1048  Ty = VecTy;
1049 
1050  return RD->Kind;
1051 }
1052 
1053 static std::pair<Value *, ShuffleVectorInst *>
1055  ShuffleVectorInst *S = nullptr;
1056 
1057  if ((S = dyn_cast<ShuffleVectorInst>(L)))
1058  return std::make_pair(R, S);
1059 
1060  S = dyn_cast<ShuffleVectorInst>(R);
1061  return std::make_pair(L, S);
1062 }
1063 
1064 static ReductionKind
1066  unsigned &Opcode, Type *&Ty) {
1067  if (!EnableReduxCost)
1068  return RK_None;
1069 
1070  // Need to extract the first element.
1071  ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1072  unsigned Idx = ~0u;
1073  if (CI)
1074  Idx = CI->getZExtValue();
1075  if (Idx != 0)
1076  return RK_None;
1077 
1078  auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1079  if (!RdxStart)
1080  return RK_None;
1082  if (!RD)
1083  return RK_None;
1084 
1085  Type *VecTy = ReduxRoot->getOperand(0)->getType();
1086  unsigned NumVecElems = VecTy->getVectorNumElements();
1087  if (!isPowerOf2_32(NumVecElems))
1088  return RK_None;
1089 
1090  // We look for a sequence of shuffles and adds like the following matching one
1091  // fadd, shuffle vector pair at a time.
1092  //
1093  // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1094  // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1095  // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1096  // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1097  // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1098  // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1099  // %r = extractelement <4 x float> %bin.rdx8, i32 0
1100 
1101  unsigned MaskStart = 1;
1102  Instruction *RdxOp = RdxStart;
1103  SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1104  unsigned NumVecElemsRemain = NumVecElems;
1105  while (NumVecElemsRemain - 1) {
1106  // Check for the right reduction operation.
1107  if (!RdxOp)
1108  return RK_None;
1109  Optional<ReductionData> RDLevel = getReductionData(RdxOp);
1110  if (!RDLevel || !RDLevel->hasSameData(*RD))
1111  return RK_None;
1112 
1113  Value *NextRdxOp;
1114  ShuffleVectorInst *Shuffle;
1115  std::tie(NextRdxOp, Shuffle) =
1116  getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1117 
1118  // Check the current reduction operation and the shuffle use the same value.
1119  if (Shuffle == nullptr)
1120  return RK_None;
1121  if (Shuffle->getOperand(0) != NextRdxOp)
1122  return RK_None;
1123 
1124  // Check that shuffle masks matches.
1125  for (unsigned j = 0; j != MaskStart; ++j)
1126  ShuffleMask[j] = MaskStart + j;
1127  // Fill the rest of the mask with -1 for undef.
1128  std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1129 
1131  if (ShuffleMask != Mask)
1132  return RK_None;
1133 
1134  RdxOp = dyn_cast<Instruction>(NextRdxOp);
1135  NumVecElemsRemain /= 2;
1136  MaskStart *= 2;
1137  }
1138 
1139  Opcode = RD->Opcode;
1140  Ty = VecTy;
1141  return RD->Kind;
1142 }
1143 
1144 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1145  switch (I->getOpcode()) {
1146  case Instruction::GetElementPtr:
1147  return getUserCost(I);
1148 
1149  case Instruction::Ret:
1150  case Instruction::PHI:
1151  case Instruction::Br: {
1152  return getCFInstrCost(I->getOpcode());
1153  }
1154  case Instruction::Add:
1155  case Instruction::FAdd:
1156  case Instruction::Sub:
1157  case Instruction::FSub:
1158  case Instruction::Mul:
1159  case Instruction::FMul:
1160  case Instruction::UDiv:
1161  case Instruction::SDiv:
1162  case Instruction::FDiv:
1163  case Instruction::URem:
1164  case Instruction::SRem:
1165  case Instruction::FRem:
1166  case Instruction::Shl:
1167  case Instruction::LShr:
1168  case Instruction::AShr:
1169  case Instruction::And:
1170  case Instruction::Or:
1171  case Instruction::Xor: {
1174  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1175  Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1177  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1178  Op1VP, Op2VP, Operands);
1179  }
1180  case Instruction::FNeg: {
1183  Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1184  Op2VK = OK_AnyValue;
1185  Op2VP = OP_None;
1187  return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1188  Op1VP, Op2VP, Operands);
1189  }
1190  case Instruction::Select: {
1191  const SelectInst *SI = cast<SelectInst>(I);
1192  Type *CondTy = SI->getCondition()->getType();
1193  return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1194  }
1195  case Instruction::ICmp:
1196  case Instruction::FCmp: {
1197  Type *ValTy = I->getOperand(0)->getType();
1198  return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1199  }
1200  case Instruction::Store: {
1201  const StoreInst *SI = cast<StoreInst>(I);
1202  Type *ValTy = SI->getValueOperand()->getType();
1203  return getMemoryOpCost(I->getOpcode(), ValTy,
1204  SI->getAlignment(),
1205  SI->getPointerAddressSpace(), I);
1206  }
1207  case Instruction::Load: {
1208  const LoadInst *LI = cast<LoadInst>(I);
1209  return getMemoryOpCost(I->getOpcode(), I->getType(),
1210  LI->getAlignment(),
1211  LI->getPointerAddressSpace(), I);
1212  }
1213  case Instruction::ZExt:
1214  case Instruction::SExt:
1215  case Instruction::FPToUI:
1216  case Instruction::FPToSI:
1217  case Instruction::FPExt:
1218  case Instruction::PtrToInt:
1219  case Instruction::IntToPtr:
1220  case Instruction::SIToFP:
1221  case Instruction::UIToFP:
1222  case Instruction::Trunc:
1223  case Instruction::FPTrunc:
1224  case Instruction::BitCast:
1225  case Instruction::AddrSpaceCast: {
1226  Type *SrcTy = I->getOperand(0)->getType();
1227  return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1228  }
1229  case Instruction::ExtractElement: {
1230  const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1232  unsigned Idx = -1;
1233  if (CI)
1234  Idx = CI->getZExtValue();
1235 
1236  // Try to match a reduction sequence (series of shufflevector and vector
1237  // adds followed by a extractelement).
1238  unsigned ReduxOpCode;
1239  Type *ReduxType;
1240 
1241  switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1242  case RK_Arithmetic:
1243  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1244  /*IsPairwiseForm=*/false);
1245  case RK_MinMax:
1246  return getMinMaxReductionCost(
1247  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1248  /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1249  case RK_UnsignedMinMax:
1250  return getMinMaxReductionCost(
1251  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1252  /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1253  case RK_None:
1254  break;
1255  }
1256 
1257  switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1258  case RK_Arithmetic:
1259  return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1260  /*IsPairwiseForm=*/true);
1261  case RK_MinMax:
1262  return getMinMaxReductionCost(
1263  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1264  /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1265  case RK_UnsignedMinMax:
1266  return getMinMaxReductionCost(
1267  ReduxType, CmpInst::makeCmpResultType(ReduxType),
1268  /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1269  case RK_None:
1270  break;
1271  }
1272 
1273  return getVectorInstrCost(I->getOpcode(),
1274  EEI->getOperand(0)->getType(), Idx);
1275  }
1276  case Instruction::InsertElement: {
1277  const InsertElementInst * IE = cast<InsertElementInst>(I);
1278  ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1279  unsigned Idx = -1;
1280  if (CI)
1281  Idx = CI->getZExtValue();
1282  return getVectorInstrCost(I->getOpcode(),
1283  IE->getType(), Idx);
1284  }
1285  case Instruction::ExtractValue:
1286  return 0; // Model all ExtractValue nodes as free.
1287  case Instruction::ShuffleVector: {
1288  const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1289  Type *Ty = Shuffle->getType();
1290  Type *SrcTy = Shuffle->getOperand(0)->getType();
1291 
1292  // TODO: Identify and add costs for insert subvector, etc.
1293  int SubIndex;
1294  if (Shuffle->isExtractSubvectorMask(SubIndex))
1295  return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1296 
1297  if (Shuffle->changesLength())
1298  return -1;
1299 
1300  if (Shuffle->isIdentity())
1301  return 0;
1302 
1303  if (Shuffle->isReverse())
1304  return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1305 
1306  if (Shuffle->isSelect())
1307  return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1308 
1309  if (Shuffle->isTranspose())
1310  return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1311 
1312  if (Shuffle->isZeroEltSplat())
1313  return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1314 
1315  if (Shuffle->isSingleSource())
1316  return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1317 
1318  return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1319  }
1320  case Instruction::Call:
1321  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1322  SmallVector<Value *, 4> Args(II->arg_operands());
1323 
1324  FastMathFlags FMF;
1325  if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1326  FMF = FPMO->getFastMathFlags();
1327 
1328  return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1329  Args, FMF);
1330  }
1331  return -1;
1332  default:
1333  // We don't have any information on this instruction.
1334  return -1;
1335  }
1336 }
1337 
1339 
1340 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1341 
1343  std::function<Result(const Function &)> TTICallback)
1344  : TTICallback(std::move(TTICallback)) {}
1345 
1348  return TTICallback(F);
1349 }
1350 
1351 AnalysisKey TargetIRAnalysis::Key;
1352 
1353 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1354  return Result(F.getParent()->getDataLayout());
1355 }
1356 
1357 // Register the basic pass.
1359  "Target Transform Information", false, true)
1361 
1362 void TargetTransformInfoWrapperPass::anchor() {}
1363 
1365  : ImmutablePass(ID) {
1368 }
1369 
1371  TargetIRAnalysis TIRA)
1372  : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1375 }
1376 
1378  FunctionAnalysisManager DummyFAM;
1379  TTI = TIRA.run(F, DummyFAM);
1380  return *TTI;
1381 }
1382 
1383 ImmutablePass *
1385  return new TargetTransformInfoWrapperPass(std::move(TIRA));
1386 }
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:415
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:70
static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, unsigned Level)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
SI Whole Quad Mode
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
int getCallCost(FunctionType *FTy, int NumArgs=-1, const User *U=nullptr) const
Estimate the cost of a function call when lowered.
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
const Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
static ReductionKind matchPairwiseReductionAtLevel(Instruction *I, unsigned Level, unsigned NumLevels)
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
The main scalar evolution driver.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands) const
Estimate the cost of a GEP operation when lowered.
MemIndexedMode
The type of load/store indexing.
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
bool isReverse() const
Return true if this shuffle swaps the order of elements from exactly one source vector.
A cache of @llvm.assume calls within a function.
Analysis pass providing the TargetTransformInfo.
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAlign, unsigned DestAlign) const
This instruction constructs a fixed permutation of two input vectors.
uint64_t getTypeSizeInBits(Type *Ty) const
Return the size in bits of the specified type, for which isSCEVable must return true.
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
int getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys) const
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
F(f)
An instruction for reading from memory.
Definition: Instructions.h:169
MaxMin_match< FCmpInst, LHS, RHS, ufmax_pred_ty > m_UnordFMax(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point maximum function.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don&#39;t restrict interleaved unrolling to small loops.
llvm::Optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isSingleSource() const
Return true if this shuffle chooses elements from exactly one source vector without changing the leng...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getMaxInterleaveFactor(unsigned VF) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:47
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
Definition: BitVector.h:937
int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy=nullptr) const
Estimate the cost of a specific operation when lowered.
bool hasBranchDivergence() const
Return true if branch divergence exists.
This class represents the LLVM &#39;select&#39; instruction.
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize) const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
Definition: LoopInfo.h:928
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
static AnalysisKey * ID()
Returns an opaque, unique ID for this analysis type.
Definition: PassManager.h:406
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
MaxMin_match< FCmpInst, LHS, RHS, ufmin_pred_ty > m_UnordFMin(const LHS &L, const RHS &R)
Match an &#39;unordered&#39; floating point minimum function.
mir Rename Register Operands
bool isLegalMaskedScatter(Type *DataType) const
Return true if the target supports masked scatter.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:424
Class to represent function types.
Definition: DerivedTypes.h:108
unsigned getMinVectorRegisterBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:246
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it&#39;s free to truncate a value of type Ty1 to type Ty2.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
PopcntSupportKind
Flags indicating the kind of support for population count.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
MaxMin_match< FCmpInst, LHS, RHS, ofmin_pred_ty > m_OrdFMin(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point minimum function.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1) const
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
An instruction for storing to memory.
Definition: Instructions.h:325
void initializeTargetTransformInfoWrapperPassPass(PassRegistry &)
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index=-1) const
bool isSelect() const
Return true if this shuffle chooses elements from its source vectors without lane crossings and all o...
bool isLegalToVectorizeLoad(LoadInst *LI) const
Reverse the order of the vector.
VectorType * getType() const
Overload to return most specific vector type.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:144
int getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
Value * getOperand(unsigned i) const
Definition: User.h:169
Analysis containing CSE Info
Definition: CSEInfo.cpp:20
int getExtCost(const Instruction *I, const Value *Src) const
Estimate the cost of a EXT operation when lowered.
static bool isExtractSubvectorMask(ArrayRef< int > Mask, int NumSrcElts, int &Index)
Return true if this shuffle mask is an extract subvector mask.
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
ExtractSubvector Index indicates start offset.
static cl::opt< bool > EnableReduxCost("costmodel-reduxcost", cl::init(false), cl::Hidden, cl::desc("Recognize reduction patterns."))
unsigned getMaxPrefetchIterationsAhead() const
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:432
This instruction inserts a single (scalar) element into a VectorType value.
int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment) const
Returns options for expansion of memcmp. IsZeroCmp is.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:148
Wrapper pass for TargetTransformInfo.
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
bool isLegalToVectorizeStore(StoreInst *SI) const
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:465
unsigned getRegisterBitWidth(bool Vector) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
Flags describing the kind of vector reduction.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isAlwaysUniform(const Value *V) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
unsigned getNumberOfParts(Type *Tp) const
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:41
bool isIdentity() const
Return true if this shuffle chooses elements from exactly one source vector without lane crossings an...
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
AMDGPU Lower Kernel Arguments
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
TargetIRAnalysis()
Default construct a target IR analysis.
bool isLegalMaskedLoad(Type *DataType, MaybeAlign Alignment) const
Return true if the target supports masked load.
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy=nullptr, const Instruction *I=nullptr) const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
constexpr double e
Definition: MathExtras.h:57
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Merge elements from two source vectors into one with any shuffle mask.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:73
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
Attributes of a target dependent hardware loop.
const Value * getCondition() const
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, SmallPtrSetImpl< Argument *> &Args) const
static std::pair< Value *, ShuffleVectorInst * > getShuffleAndOtherOprd(Value *L, Value *R)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:33
unsigned getNumberOfRegisters(unsigned ClassID) const
OperandValueProperties
Additional properties of an operand&#39;s values.
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
ReductionKind
Kind of the reduction data.
int getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type...
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
ImmutablePass class - This class is used to provide information that does not need to be run...
Definition: Pass.h:255
Type * getType() const
Return the LLVM type of this SCEV expression.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C1.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const
Calculate the cost of performing a vector reduction.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment...
Definition: Alignment.h:117
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:248
Module.h This file contains the declarations for the Module class.
unsigned getCostOfKeepingLiveOverCall(ArrayRef< Type *> Tys) const
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
int getIntImmCost(const APInt &Imm, Type *Ty) const
Return the expected cost of materializing for the given integer immediate of the specified type...
Provides information about what library functions are available for the current target.
bool isLegalMaskedGather(Type *DataType) const
Return true if the target supports masked gather.
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const
bool changesLength() const
Return true if this shuffle returns a vector with a different number of elements than its source vect...
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:124
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
static ReductionKind matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, unsigned &Opcode, Type *&Ty)
bool canAnalyze(LoopInfo &LI)
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:585
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:566
bool isZeroEltSplat() const
Return true if all elements of this shuffle are the same value as the first element of exactly one so...
Class to represent vector types.
Definition: DerivedTypes.h:432
Class for arbitrary precision integers.
Definition: APInt.h:69
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index=-1) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop...
Result run(const Function &F, FunctionAnalysisManager &)
bool shouldExpandReduction(const IntrinsicInst *II) const
int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U=nullptr) const
Estimate the cost of an intrinsic when lowered.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isTranspose() const
Return true if this shuffle transposes the elements of its inputs without changing the length of the ...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
int getMemcpyCost(const Instruction *I) const
unsigned getGISelRematGlobalCost() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
unsigned getAtomicMemIntrinsicMaxElementSize() const
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:242
This class represents an analyzed expression in the program.
int getUserCost(const User *U, ArrayRef< const Value *> Operands) const
Estimate the cost of a given IR user when lowered.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:509
Parameters that control the generic loop unrolling transformation.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
#define I(x, y, z)
Definition: MD5.cpp:58
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
iterator_range< value_op_iterator > operand_values()
Definition: User.h:261
const char * getRegisterClassName(unsigned ClassID) const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
This instruction extracts a single (scalar) element from a VectorType value.
int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index=0, Type *SubTp=nullptr) const
bool isLegalMaskedStore(Type *DataType, MaybeAlign Alignment) const
Return true if the target supports masked store.
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info=OK_AnyValue, OperandValueKind Opd2Info=OK_AnyValue, OperandValueProperties Opd1PropInfo=OP_None, OperandValueProperties Opd2PropInfo=OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>()) const
This is an approximation of reciprocal throughput of a math/logic op.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:368
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
Definition: LoopIterator.h:172
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool shouldMaximizeVectorBandwidth(bool OptSize) const
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:295
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Multiway switch.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
TargetTransformInfo Result
MaxMin_match< FCmpInst, LHS, RHS, ofmax_pred_ty > m_OrdFMax(const LHS &L, const RHS &R)
Match an &#39;ordered&#39; floating point maximum function.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:575
LLVM Value Representation.
Definition: Value.h:74
unsigned getInliningThresholdMultiplier() const
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
Broadcast element 0 to all other elements.
static Optional< ReductionData > getReductionData(Instruction *I)
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type *> &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const
int getCFInstrCost(unsigned Opcode) const
bool shouldFavorBackedgeIndex(const Loop *L) const
Return true if LSR should make efforts to generate indexed addressing modes that operate across loop ...
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool prefersVectorizedAddressing() const
Return true if target doesn&#39;t mind addresses in vectors.
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF) const
print Print MemDeps of function
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP) const
Get target-customized preferences for the generic loop unrolling transformation.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
OperandValueKind
Additional information about an operand&#39;s possible values.
A container for analyses that lazily runs them and caches their results.
unsigned getMinimumVF(unsigned ElemWidth) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Return the number of times the backedge executes before the given exit would be taken; if not exactly...
This pass exposes codegen information to IR-level passes.
TargetTransformInfo & getTTI(const Function &F)
CacheLevel
The possible cache levels.
void perform(LoopInfo *LI)
Traverse the loop blocks and store the DFS result.
Definition: LoopIterator.h:180
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr) const
VectorType * getType() const
Overload to return most specific vector type.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target&#39;s &#39;flat&#39; address space.
Information about a load/store intrinsic defined by the target.
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:71
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm, bool IsUnsigned) const
llvm::Optional< unsigned > getCacheSize(CacheLevel Level) const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:43
This class represents a constant integer value.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.