LLVM 17.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35protected:
37
38 const DataLayout &DL;
39
41
42public:
43 // Provide value semantics. MSVC requires that we spell all of these out.
46
47 const DataLayout &getDataLayout() const { return DL; }
48
49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
52 // In the basic model, we just assume that all-constant GEPs will be folded
53 // into their uses via addressing modes.
54 for (const Value *Operand : Operands)
55 if (!isa<Constant>(Operand))
56 return TTI::TCC_Basic;
57
58 return TTI::TCC_Free;
59 }
60
62 unsigned &JTSize,
64 BlockFrequencyInfo *BFI) const {
65 (void)PSI;
66 (void)BFI;
67 JTSize = 0;
68 return SI.getNumCases();
69 }
70
71 unsigned getInliningThresholdMultiplier() const { return 1; }
72 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
73
74 int getInlinerVectorBonusPercent() const { return 150; }
75
77 return TTI::TCC_Expensive;
78 }
79
80 // Although this default value is arbitrary, it is not random. It is assumed
81 // that a condition that evaluates the same way by a higher percentage than
82 // this is best represented as control flow. Therefore, the default value N
83 // should be set such that the win from N% correct executions is greater than
84 // the loss from (100 - N)% mispredicted executions for the majority of
85 // intended targets.
87 return BranchProbability(99, 100);
88 }
89
90 bool hasBranchDivergence() const { return false; }
91
92 bool useGPUDivergenceAnalysis() const { return false; }
93
94 bool isSourceOfDivergence(const Value *V) const { return false; }
95
96 bool isAlwaysUniform(const Value *V) const { return false; }
97
98 unsigned getFlatAddressSpace() const { return -1; }
99
101 Intrinsic::ID IID) const {
102 return false;
103 }
104
105 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
107 return AS == 0;
108 };
109
110 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
111
112 bool isSingleThreaded() const { return false; }
113
114 std::pair<const Value *, unsigned>
116 return std::make_pair(nullptr, -1);
117 }
118
120 Value *NewV) const {
121 return nullptr;
122 }
123
124 bool isLoweredToCall(const Function *F) const {
125 assert(F && "A concrete function must be provided to this routine.");
126
127 // FIXME: These should almost certainly not be handled here, and instead
128 // handled with the help of TLI or the target itself. This was largely
129 // ported from existing analysis heuristics here so that such refactorings
130 // can take place in the future.
131
132 if (F->isIntrinsic())
133 return false;
134
135 if (F->hasLocalLinkage() || !F->hasName())
136 return true;
137
138 StringRef Name = F->getName();
139
140 // These will all likely lower to a single selection DAG node.
141 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
142 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
143 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
144 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
145 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
146 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
147 return false;
148
149 // These are all likely to be optimized into something smaller.
150 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
151 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
152 Name == "floorf" || Name == "ceil" || Name == "round" ||
153 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
154 Name == "llabs")
155 return false;
156
157 return true;
158 }
159
162 HardwareLoopInfo &HWLoopInfo) const {
163 return false;
164 }
165
168 DominatorTree *DT,
170 InterleavedAccessInfo *IAI) const {
171 return false;
172 }
173
176 }
177
178 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
179 IntrinsicInst &II) const {
180 return std::nullopt;
181 }
182
183 std::optional<Value *>
185 APInt DemandedMask, KnownBits &Known,
186 bool &KnownBitsComputed) const {
187 return std::nullopt;
188 }
189
191 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
192 APInt &UndefElts2, APInt &UndefElts3,
193 std::function<void(Instruction *, unsigned, APInt, APInt &)>
194 SimplifyAndSetOp) const {
195 return std::nullopt;
196 }
197
200 OptimizationRemarkEmitter *) const {}
201
203 TTI::PeelingPreferences &) const {}
204
205 bool isLegalAddImmediate(int64_t Imm) const { return false; }
206
207 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
208
209 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
210 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
211 Instruction *I = nullptr) const {
212 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
213 // taken from the implementation of LSR.
214 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
215 }
216
217 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
218 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
219 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
220 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
221 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
222 }
223
224 bool isNumRegsMajorCostOfLSR() const { return true; }
225
226 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
227
228 bool canMacroFuseCmp() const { return false; }
229
232 TargetLibraryInfo *LibInfo) const {
233 return false;
234 }
235
238 return TTI::AMK_None;
239 }
240
241 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
242 return false;
243 }
244
245 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
246 return false;
247 }
248
249 bool isLegalNTStore(Type *DataType, Align Alignment) const {
250 // By default, assume nontemporal memory stores are available for stores
251 // that are aligned and have a size that is a power of 2.
252 unsigned DataSize = DL.getTypeStoreSize(DataType);
253 return Alignment >= DataSize && isPowerOf2_32(DataSize);
254 }
255
256 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
257 // By default, assume nontemporal memory loads are available for loads that
258 // are aligned and have a size that is a power of 2.
259 unsigned DataSize = DL.getTypeStoreSize(DataType);
260 return Alignment >= DataSize && isPowerOf2_32(DataSize);
261 }
262
263 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
264 return false;
265 }
267 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
268 return false;
269 }
270
271 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
272 return false;
273 }
274
275 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
276 return false;
277 }
278
280 Align Alignment) const {
281 return false;
282 }
283
284 bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
285
286 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
287 const SmallBitVector &OpcodeMask) const {
288 return false;
289 }
290
291 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
292
293 bool enableOrderedReductions() const { return false; }
294
295 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
296
297 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
298 return false;
299 }
300
301 bool prefersVectorizedAddressing() const { return true; }
302
304 int64_t BaseOffset, bool HasBaseReg,
305 int64_t Scale,
306 unsigned AddrSpace) const {
307 // Guess that all legal addressing mode are free.
308 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
309 AddrSpace))
310 return 0;
311 return -1;
312 }
313
314 bool LSRWithInstrQueries() const { return false; }
315
316 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
317
318 bool isProfitableToHoist(Instruction *I) const { return true; }
319
320 bool useAA() const { return false; }
321
322 bool isTypeLegal(Type *Ty) const { return false; }
323
324 unsigned getRegUsageForType(Type *Ty) const { return 1; }
325
326 bool shouldBuildLookupTables() const { return true; }
327
328 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
329
330 bool shouldBuildRelLookupTables() const { return false; }
331
332 bool useColdCCForColdCall(Function &F) const { return false; }
333
335 const APInt &DemandedElts,
336 bool Insert, bool Extract,
338 return 0;
339 }
340
345 return 0;
346 }
347
348 bool supportsEfficientVectorElementLoadStore() const { return false; }
349
350 bool supportsTailCalls() const { return true; }
351
352 bool supportsTailCallFor(const CallBase *CB) const {
353 return supportsTailCalls();
354 }
355
356 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
357 return false;
358 }
359
361 bool IsZeroCmp) const {
362 return {};
363 }
364
365 bool enableSelectOptimize() const { return true; }
366
367 bool enableInterleavedAccessVectorization() const { return false; }
368
369 bool enableMaskedInterleavedAccessVectorization() const { return false; }
370
371 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
372
374 unsigned AddressSpace, Align Alignment,
375 unsigned *Fast) const {
376 return false;
377 }
378
379 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
380 return TTI::PSK_Software;
381 }
382
383 bool haveFastSqrt(Type *Ty) const { return false; }
384
385 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
386
387 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
388
391 }
392
393 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
394 const APInt &Imm, Type *Ty) const {
395 return 0;
396 }
397
400 return TTI::TCC_Basic;
401 }
402
403 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
404 const APInt &Imm, Type *Ty,
406 Instruction *Inst = nullptr) const {
407 return TTI::TCC_Free;
408 }
409
411 const APInt &Imm, Type *Ty,
413 return TTI::TCC_Free;
414 }
415
416 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
417
418 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
419 return Vector ? 1 : 0;
420 };
421
422 const char *getRegisterClassName(unsigned ClassID) const {
423 switch (ClassID) {
424 default:
425 return "Generic::Unknown Register Class";
426 case 0:
427 return "Generic::ScalarRC";
428 case 1:
429 return "Generic::VectorRC";
430 }
431 }
432
434 return TypeSize::getFixed(32);
435 }
436
437 unsigned getMinVectorRegisterBitWidth() const { return 128; }
438
439 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
440 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
441
442 bool
444 return false;
445 }
446
447 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
448 return ElementCount::get(0, IsScalable);
449 }
450
451 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
452 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
453
455 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
456 AllowPromotionWithoutCommonHeader = false;
457 return false;
458 }
459
460 unsigned getCacheLineSize() const { return 0; }
461 std::optional<unsigned>
463 switch (Level) {
465 [[fallthrough]];
467 return std::nullopt;
468 }
469 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
470 }
471
472 std::optional<unsigned>
474 switch (Level) {
476 [[fallthrough]];
478 return std::nullopt;
479 }
480
481 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
482 }
483
484 unsigned getPrefetchDistance() const { return 0; }
485 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
486 unsigned NumStridedMemAccesses,
487 unsigned NumPrefetches, bool HasCall) const {
488 return 1;
489 }
490 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
491 bool enableWritePrefetching() const { return false; }
492 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
493
494 unsigned getMaxInterleaveFactor(unsigned VF) const { return 1; }
495
497 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
500 const Instruction *CxtI = nullptr) const {
501 // FIXME: A number of transformation tests seem to require these values
502 // which seems a little odd for how arbitary there are.
503 switch (Opcode) {
504 default:
505 break;
506 case Instruction::FDiv:
507 case Instruction::FRem:
508 case Instruction::SDiv:
509 case Instruction::SRem:
510 case Instruction::UDiv:
511 case Instruction::URem:
512 // FIXME: Unlikely to be true for CodeSize.
513 return TTI::TCC_Expensive;
514 }
515
516 // Assume a 3cy latency for fp arithmetic ops.
518 if (Ty->getScalarType()->isFloatingPointTy())
519 return 3;
520
521 return 1;
522 }
523
527 ArrayRef<const Value *> Args = std::nullopt) const {
528 return 1;
529 }
530
531 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
534 const Instruction *I) const {
535 switch (Opcode) {
536 default:
537 break;
538 case Instruction::IntToPtr: {
539 unsigned SrcSize = Src->getScalarSizeInBits();
540 if (DL.isLegalInteger(SrcSize) &&
541 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
542 return 0;
543 break;
544 }
545 case Instruction::PtrToInt: {
546 unsigned DstSize = Dst->getScalarSizeInBits();
547 if (DL.isLegalInteger(DstSize) &&
548 DstSize >= DL.getPointerTypeSizeInBits(Src))
549 return 0;
550 break;
551 }
552 case Instruction::BitCast:
553 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
554 // Identity and pointer-to-pointer casts are free.
555 return 0;
556 break;
557 case Instruction::Trunc: {
558 // trunc to a native type is free (assuming the target has compare and
559 // shift-right of the same width).
560 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
561 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
562 return 0;
563 break;
564 }
565 }
566 return 1;
567 }
568
570 VectorType *VecTy,
571 unsigned Index) const {
572 return 1;
573 }
574
576 const Instruction *I = nullptr) const {
577 // A phi would be free, unless we're costing the throughput because it
578 // will require a register.
579 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
580 return 0;
581 return 1;
582 }
583
584 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
585 CmpInst::Predicate VecPred,
587 const Instruction *I) const {
588 return 1;
589 }
590
593 unsigned Index, Value *Op0,
594 Value *Op1) const {
595 return 1;
596 }
597
600 unsigned Index) const {
601 return 1;
602 }
603
604 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
605 const APInt &DemandedDstElts,
607 return 1;
608 }
609
610 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
611 unsigned AddressSpace,
614 const Instruction *I) const {
615 return 1;
616 }
617
618 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
619 unsigned AddressSpace,
621 const Instruction *I) const {
622 return 1;
623 }
624
626 Align Alignment, unsigned AddressSpace,
628 return 1;
629 }
630
632 const Value *Ptr, bool VariableMask,
633 Align Alignment,
635 const Instruction *I = nullptr) const {
636 return 1;
637 }
638
640 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
641 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
642 bool UseMaskForCond, bool UseMaskForGaps) const {
643 return 1;
644 }
645
648 switch (ICA.getID()) {
649 default:
650 break;
651 case Intrinsic::annotation:
652 case Intrinsic::assume:
653 case Intrinsic::sideeffect:
654 case Intrinsic::pseudoprobe:
655 case Intrinsic::arithmetic_fence:
656 case Intrinsic::dbg_declare:
657 case Intrinsic::dbg_value:
658 case Intrinsic::dbg_label:
659 case Intrinsic::invariant_start:
660 case Intrinsic::invariant_end:
661 case Intrinsic::launder_invariant_group:
662 case Intrinsic::strip_invariant_group:
663 case Intrinsic::is_constant:
664 case Intrinsic::lifetime_start:
665 case Intrinsic::lifetime_end:
666 case Intrinsic::experimental_noalias_scope_decl:
667 case Intrinsic::objectsize:
668 case Intrinsic::ptr_annotation:
669 case Intrinsic::var_annotation:
670 case Intrinsic::experimental_gc_result:
671 case Intrinsic::experimental_gc_relocate:
672 case Intrinsic::coro_alloc:
673 case Intrinsic::coro_begin:
674 case Intrinsic::coro_free:
675 case Intrinsic::coro_end:
676 case Intrinsic::coro_frame:
677 case Intrinsic::coro_size:
678 case Intrinsic::coro_align:
679 case Intrinsic::coro_suspend:
680 case Intrinsic::coro_subfn_addr:
681 case Intrinsic::threadlocal_address:
682 // These intrinsics don't actually represent code after lowering.
683 return 0;
684 }
685 return 1;
686 }
687
691 return 1;
692 }
693
694 // Assume that we have a register of the right size for the type.
695 unsigned getNumberOfParts(Type *Tp) const { return 1; }
696
698 const SCEV *) const {
699 return 0;
700 }
701
703 std::optional<FastMathFlags> FMF,
704 TTI::TargetCostKind) const {
705 return 1;
706 }
707
709 TTI::TargetCostKind) const {
710 return 1;
711 }
712
713 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
714 Type *ResTy, VectorType *Ty,
715 std::optional<FastMathFlags> FMF,
717 return 1;
718 }
719
721 VectorType *Ty,
723 return 1;
724 }
725
727 return 0;
728 }
729
731 return false;
732 }
733
735 // Note for overrides: You must ensure for all element unordered-atomic
736 // memory intrinsics that all power-of-2 element sizes up to, and
737 // including, the return value of this method have a corresponding
738 // runtime lib call. These runtime lib call definitions can be found
739 // in RuntimeLibcalls.h
740 return 0;
741 }
742
744 Type *ExpectedType) const {
745 return nullptr;
746 }
747
748 Type *
750 unsigned SrcAddrSpace, unsigned DestAddrSpace,
751 unsigned SrcAlign, unsigned DestAlign,
752 std::optional<uint32_t> AtomicElementSize) const {
753 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
755 }
756
758 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
759 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
760 unsigned SrcAlign, unsigned DestAlign,
761 std::optional<uint32_t> AtomicCpySize) const {
762 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
763 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
764 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
765 OpsOut.push_back(OpType);
766 }
767
768 bool areInlineCompatible(const Function *Caller,
769 const Function *Callee) const {
770 return (Caller->getFnAttribute("target-cpu") ==
771 Callee->getFnAttribute("target-cpu")) &&
772 (Caller->getFnAttribute("target-features") ==
773 Callee->getFnAttribute("target-features"));
774 }
775
776 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
777 const ArrayRef<Type *> &Types) const {
778 return (Caller->getFnAttribute("target-cpu") ==
779 Callee->getFnAttribute("target-cpu")) &&
780 (Caller->getFnAttribute("target-features") ==
781 Callee->getFnAttribute("target-features"));
782 }
783
785 const DataLayout &DL) const {
786 return false;
787 }
788
790 const DataLayout &DL) const {
791 return false;
792 }
793
794 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
795
796 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
797
798 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
799
800 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
801 unsigned AddrSpace) const {
802 return true;
803 }
804
805 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
806 unsigned AddrSpace) const {
807 return true;
808 }
809
811 ElementCount VF) const {
812 return true;
813 }
814
815 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
816
817 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
818 unsigned ChainSizeInBytes,
819 VectorType *VecTy) const {
820 return VF;
821 }
822
823 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
824 unsigned ChainSizeInBytes,
825 VectorType *VecTy) const {
826 return VF;
827 }
828
829 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
830 TTI::ReductionFlags Flags) const {
831 return false;
832 }
833
834 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
835 TTI::ReductionFlags Flags) const {
836 return false;
837 }
838
840 return true;
841 }
842
843 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
844
845 unsigned getGISelRematGlobalCost() const { return 1; }
846
847 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
848
849 bool supportsScalableVectors() const { return false; }
850
851 bool enableScalableVectorization() const { return false; }
852
853 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
854 Align Alignment) const {
855 return false;
856 }
857
862 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
863 }
864
865protected:
866 // Obtain the minimum required size to hold the value (without the sign)
867 // In case of a vector it returns the min required size for one element.
868 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
869 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
870 const auto *VectorValue = cast<Constant>(Val);
871
872 // In case of a vector need to pick the max between the min
873 // required size for each element
874 auto *VT = cast<FixedVectorType>(Val->getType());
875
876 // Assume unsigned elements
877 isSigned = false;
878
879 // The max required size is the size of the vector element type
880 unsigned MaxRequiredSize =
881 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
882
883 unsigned MinRequiredSize = 0;
884 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
885 if (auto *IntElement =
886 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
887 bool signedElement = IntElement->getValue().isNegative();
888 // Get the element min required size.
889 unsigned ElementMinRequiredSize =
890 IntElement->getValue().getMinSignedBits() - 1;
891 // In case one element is signed then all the vector is signed.
892 isSigned |= signedElement;
893 // Save the max required bit size between all the elements.
894 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
895 } else {
896 // not an int constant element
897 return MaxRequiredSize;
898 }
899 }
900 return MinRequiredSize;
901 }
902
903 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
904 isSigned = CI->getValue().isNegative();
905 return CI->getValue().getMinSignedBits() - 1;
906 }
907
908 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
909 isSigned = true;
910 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
911 }
912
913 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
914 isSigned = false;
915 return Cast->getSrcTy()->getScalarSizeInBits();
916 }
917
918 isSigned = false;
919 return Val->getType()->getScalarSizeInBits();
920 }
921
922 bool isStridedAccess(const SCEV *Ptr) const {
923 return Ptr && isa<SCEVAddRecExpr>(Ptr);
924 }
925
927 const SCEV *Ptr) const {
928 if (!isStridedAccess(Ptr))
929 return nullptr;
930 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
931 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
932 }
933
935 int64_t MergeDistance) const {
936 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
937 if (!Step)
938 return false;
939 APInt StrideVal = Step->getAPInt();
940 if (StrideVal.getBitWidth() > 64)
941 return false;
942 // FIXME: Need to take absolute value for negative stride case.
943 return StrideVal.getSExtValue() < MergeDistance;
944 }
945};
946
947/// CRTP base class for use as a mix-in that aids implementing
948/// a TargetTransformInfo-compatible class.
949template <typename T>
951private:
953
954protected:
956
957public:
959
963 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
964 assert(cast<PointerType>(Ptr->getType()->getScalarType())
965 ->isOpaqueOrPointeeTypeMatches(PointeeType) &&
966 "explicit pointee type doesn't match operand's pointee type");
967 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
968 bool HasBaseReg = (BaseGV == nullptr);
969
970 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
971 APInt BaseOffset(PtrSizeBits, 0);
972 int64_t Scale = 0;
973
974 auto GTI = gep_type_begin(PointeeType, Operands);
975 Type *TargetType = nullptr;
976
977 // Handle the case where the GEP instruction has a single operand,
978 // the basis, therefore TargetType is a nullptr.
979 if (Operands.empty())
980 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
981
982 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
983 TargetType = GTI.getIndexedType();
984 // We assume that the cost of Scalar GEP with constant index and the
985 // cost of Vector GEP with splat constant index are the same.
986 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
987 if (!ConstIdx)
988 if (auto Splat = getSplatValue(*I))
989 ConstIdx = dyn_cast<ConstantInt>(Splat);
990 if (StructType *STy = GTI.getStructTypeOrNull()) {
991 // For structures the index is always splat or scalar constant
992 assert(ConstIdx && "Unexpected GEP index");
993 uint64_t Field = ConstIdx->getZExtValue();
994 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
995 } else {
996 // If this operand is a scalable type, bail out early.
997 // TODO: handle scalable vectors
998 if (isa<ScalableVectorType>(TargetType))
999 return TTI::TCC_Basic;
1000 int64_t ElementSize =
1001 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue();
1002 if (ConstIdx) {
1003 BaseOffset +=
1004 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1005 } else {
1006 // Needs scale register.
1007 if (Scale != 0)
1008 // No addressing mode takes two scale registers.
1009 return TTI::TCC_Basic;
1010 Scale = ElementSize;
1011 }
1012 }
1013 }
1014
1015 if (static_cast<T *>(this)->isLegalAddressingMode(
1016 TargetType, const_cast<GlobalValue *>(BaseGV),
1017 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1018 Ptr->getType()->getPointerAddressSpace()))
1019 return TTI::TCC_Free;
1020 return TTI::TCC_Basic;
1021 }
1022
1026 using namespace llvm::PatternMatch;
1027
1028 auto *TargetTTI = static_cast<T *>(this);
1029 // Handle non-intrinsic calls, invokes, and callbr.
1030 // FIXME: Unlikely to be true for anything but CodeSize.
1031 auto *CB = dyn_cast<CallBase>(U);
1032 if (CB && !isa<IntrinsicInst>(U)) {
1033 if (const Function *F = CB->getCalledFunction()) {
1034 if (!TargetTTI->isLoweredToCall(F))
1035 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1036
1037 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1038 }
1039 // For indirect or other calls, scale cost by number of arguments.
1040 return TTI::TCC_Basic * (CB->arg_size() + 1);
1041 }
1042
1043 Type *Ty = U->getType();
1044 unsigned Opcode = Operator::getOpcode(U);
1045 auto *I = dyn_cast<Instruction>(U);
1046 switch (Opcode) {
1047 default:
1048 break;
1049 case Instruction::Call: {
1050 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1051 auto *Intrinsic = cast<IntrinsicInst>(U);
1052 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1053 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1054 }
1055 case Instruction::Br:
1056 case Instruction::Ret:
1057 case Instruction::PHI:
1058 case Instruction::Switch:
1059 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1060 case Instruction::ExtractValue:
1061 case Instruction::Freeze:
1062 return TTI::TCC_Free;
1063 case Instruction::Alloca:
1064 if (cast<AllocaInst>(U)->isStaticAlloca())
1065 return TTI::TCC_Free;
1066 break;
1067 case Instruction::GetElementPtr: {
1068 const auto *GEP = cast<GEPOperator>(U);
1069 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1070 GEP->getPointerOperand(),
1071 Operands.drop_front(), CostKind);
1072 }
1073 case Instruction::Add:
1074 case Instruction::FAdd:
1075 case Instruction::Sub:
1076 case Instruction::FSub:
1077 case Instruction::Mul:
1078 case Instruction::FMul:
1079 case Instruction::UDiv:
1080 case Instruction::SDiv:
1081 case Instruction::FDiv:
1082 case Instruction::URem:
1083 case Instruction::SRem:
1084 case Instruction::FRem:
1085 case Instruction::Shl:
1086 case Instruction::LShr:
1087 case Instruction::AShr:
1088 case Instruction::And:
1089 case Instruction::Or:
1090 case Instruction::Xor:
1091 case Instruction::FNeg: {
1093 TTI::OperandValueInfo Op2Info;
1094 if (Opcode != Instruction::FNeg)
1095 Op2Info = TTI::getOperandInfo(U->getOperand(1));
1097 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1098 Op2Info, Operands, I);
1099 }
1100 case Instruction::IntToPtr:
1101 case Instruction::PtrToInt:
1102 case Instruction::SIToFP:
1103 case Instruction::UIToFP:
1104 case Instruction::FPToUI:
1105 case Instruction::FPToSI:
1106 case Instruction::Trunc:
1107 case Instruction::FPTrunc:
1108 case Instruction::BitCast:
1109 case Instruction::FPExt:
1110 case Instruction::SExt:
1111 case Instruction::ZExt:
1112 case Instruction::AddrSpaceCast: {
1113 Type *OpTy = U->getOperand(0)->getType();
1114 return TargetTTI->getCastInstrCost(
1115 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1116 }
1117 case Instruction::Store: {
1118 auto *SI = cast<StoreInst>(U);
1119 Type *ValTy = U->getOperand(0)->getType();
1121 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1122 SI->getPointerAddressSpace(), CostKind,
1123 OpInfo, I);
1124 }
1125 case Instruction::Load: {
1126 // FIXME: Arbitary cost which could come from the backend.
1128 return 4;
1129 auto *LI = cast<LoadInst>(U);
1130 Type *LoadType = U->getType();
1131 // If there is a non-register sized type, the cost estimation may expand
1132 // it to be several instructions to load into multiple registers on the
1133 // target. But, if the only use of the load is a trunc instruction to a
1134 // register sized type, the instruction selector can combine these
1135 // instructions to be a single load. So, in this case, we use the
1136 // destination type of the trunc instruction rather than the load to
1137 // accurately estimate the cost of this load instruction.
1138 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1139 !LoadType->isVectorTy()) {
1140 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1141 LoadType = TI->getDestTy();
1142 }
1143 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1145 {TTI::OK_AnyValue, TTI::OP_None}, I);
1146 }
1147 case Instruction::Select: {
1148 const Value *Op0, *Op1;
1149 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1150 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1151 // select x, y, false --> x & y
1152 // select x, true, y --> x | y
1153 const auto Op1Info = TTI::getOperandInfo(Op0);
1154 const auto Op2Info = TTI::getOperandInfo(Op1);
1155 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1156 Op1->getType()->getScalarSizeInBits() == 1);
1157
1159 return TargetTTI->getArithmeticInstrCost(
1160 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1161 CostKind, Op1Info, Op2Info, Operands, I);
1162 }
1163 Type *CondTy = U->getOperand(0)->getType();
1164 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1166 CostKind, I);
1167 }
1168 case Instruction::ICmp:
1169 case Instruction::FCmp: {
1170 Type *ValTy = U->getOperand(0)->getType();
1171 // TODO: Also handle ICmp/FCmp constant expressions.
1172 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1173 I ? cast<CmpInst>(I)->getPredicate()
1175 CostKind, I);
1176 }
1177 case Instruction::InsertElement: {
1178 auto *IE = dyn_cast<InsertElementInst>(U);
1179 if (!IE)
1180 return TTI::TCC_Basic; // FIXME
1181 unsigned Idx = -1;
1182 if (auto *CI = dyn_cast<ConstantInt>(IE->getOperand(2)))
1183 if (CI->getValue().getActiveBits() <= 32)
1184 Idx = CI->getZExtValue();
1185 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1186 }
1187 case Instruction::ShuffleVector: {
1188 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1189 if (!Shuffle)
1190 return TTI::TCC_Basic; // FIXME
1191
1192 auto *VecTy = cast<VectorType>(U->getType());
1193 auto *VecSrcTy = cast<VectorType>(U->getOperand(0)->getType());
1194 int NumSubElts, SubIndex;
1195
1196 if (Shuffle->changesLength()) {
1197 // Treat a 'subvector widening' as a free shuffle.
1198 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1199 return 0;
1200
1201 if (Shuffle->isExtractSubvectorMask(SubIndex))
1202 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1203 Shuffle->getShuffleMask(), CostKind,
1204 SubIndex, VecTy, Operands);
1205
1206 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1207 return TargetTTI->getShuffleCost(
1208 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(),
1209 CostKind, SubIndex,
1210 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1211 Operands);
1212
1213 int ReplicationFactor, VF;
1214 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1215 APInt DemandedDstElts =
1216 APInt::getNullValue(Shuffle->getShuffleMask().size());
1217 for (auto I : enumerate(Shuffle->getShuffleMask())) {
1218 if (I.value() != UndefMaskElem)
1219 DemandedDstElts.setBit(I.index());
1220 }
1221 return TargetTTI->getReplicationShuffleCost(
1222 VecSrcTy->getElementType(), ReplicationFactor, VF,
1223 DemandedDstElts, CostKind);
1224 }
1225
1226 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1;
1227 }
1228
1229 if (Shuffle->isIdentity())
1230 return 0;
1231
1232 if (Shuffle->isReverse())
1233 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy,
1234 Shuffle->getShuffleMask(), CostKind, 0,
1235 nullptr, Operands);
1236
1237 if (Shuffle->isSelect())
1238 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy,
1239 Shuffle->getShuffleMask(), CostKind, 0,
1240 nullptr, Operands);
1241
1242 if (Shuffle->isTranspose())
1243 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy,
1244 Shuffle->getShuffleMask(), CostKind, 0,
1245 nullptr, Operands);
1246
1247 if (Shuffle->isZeroEltSplat())
1248 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy,
1249 Shuffle->getShuffleMask(), CostKind, 0,
1250 nullptr, Operands);
1251
1252 if (Shuffle->isSingleSource())
1253 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1254 Shuffle->getShuffleMask(), CostKind, 0,
1255 nullptr, Operands);
1256
1257 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1258 return TargetTTI->getShuffleCost(
1259 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind,
1260 SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1261 Operands);
1262
1263 if (Shuffle->isSplice(SubIndex))
1264 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy,
1265 Shuffle->getShuffleMask(), CostKind,
1266 SubIndex, nullptr, Operands);
1267
1268 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy,
1269 Shuffle->getShuffleMask(), CostKind, 0,
1270 nullptr, Operands);
1271 }
1272 case Instruction::ExtractElement: {
1273 auto *EEI = dyn_cast<ExtractElementInst>(U);
1274 if (!EEI)
1275 return TTI::TCC_Basic; // FIXME
1276 unsigned Idx = -1;
1277 if (auto *CI = dyn_cast<ConstantInt>(EEI->getOperand(1)))
1278 if (CI->getValue().getActiveBits() <= 32)
1279 Idx = CI->getZExtValue();
1280 Type *DstTy = U->getOperand(0)->getType();
1281 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1282 }
1283 }
1284
1285 // By default, just classify everything as 'basic' or -1 to represent that
1286 // don't know the throughput cost.
1288 }
1289
1291 auto *TargetTTI = static_cast<T *>(this);
1292 SmallVector<const Value *, 4> Ops(I->operand_values());
1293 InstructionCost Cost = TargetTTI->getInstructionCost(
1296 }
1297};
1298} // namespace llvm
1299
1300#endif
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
LLVMContext & Context
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:75
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getNullValue(unsigned numBits)
NOTE: This is soft-deprecated. Please use getZero() instead.
Definition: APInt.h:180
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1516
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:718
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:132
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:267
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:681
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:724
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:507
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:676
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:475
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:297
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:698
The core instruction combiner logic.
Definition: InstCombiner.h:45
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:765
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:547
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:41
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:655
Class to represent struct types.
Definition: DerivedTypes.h:213
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
const DataLayout & getDataLayout() const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const
bool isLegalToVectorizeStore(StoreInst *SI) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
std::optional< unsigned > getVScaleForTuning() const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLegalICmpImmediate(int64_t Imm) const
unsigned getRegUsageForType(Type *Ty) const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
bool isAlwaysUniform(const Value *V) const
bool isProfitableToHoist(Instruction *I) const
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
bool isTruncateFree(Type *Ty1, Type *Ty2) const
bool isStridedAccess(const SCEV *Ptr) const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
InstructionCost getFPOpCost(Type *Ty) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isLegalMaskedExpandLoad(Type *DataType) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getMemcpyCost(const Instruction *I) const
PredicationStyle emitGetActiveLaneMask() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *TLI, DominatorTree *DT, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
bool isNoopAddrSpaceCast(unsigned, unsigned) const
unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
unsigned getAssumedAddrSpace(const Value *V) const
bool supportsTailCallFor(const CallBase *CB) const
bool isLegalNTStore(Type *DataType, Align Alignment) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
unsigned getMaxInterleaveFactor(unsigned VF) const
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
unsigned adjustInliningThreshold(const CallBase *CB) const
BranchProbability getPredictableBranchThreshold() const
InstructionCost getMinMaxReductionCost(VectorType *, VectorType *, bool, TTI::TargetCostKind) const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool isSourceOfDivergence(const Value *V) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool hasDivRemOp(Type *DataType, bool IsSigned) const
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const
bool isLegalMaskedCompressStore(Type *DataType) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
bool isLoweredToCall(const Function *F) const
TargetTransformInfoImplBase(const DataLayout &DL)
const char * getRegisterClassName(unsigned ClassID) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
bool useColdCCForColdCall(Function &F) const
bool shouldExpandReduction(const IntrinsicInst *II) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:322
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:258
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:341
Value * getOperand(unsigned i) const
Definition: User.h:169
iterator_range< value_op_iterator > operand_values()
Definition: User.h:266
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:166
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:406
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr int UndefMaskElem
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:288
detail::enumerator< R > enumerate(R &&TheRange)
Given an input range, returns a new range whose values are are pair (A,B) such that A is the 0-based ...
Definition: STLExtras.h:2264
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
gep_type_iterator gep_type_begin(const User *GEP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.