LLVM 20.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
20#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Operator.h"
25#include <optional>
26#include <utility>
27
28namespace llvm {
29
30class Function;
31
32/// Base class for use as a mix-in that aids implementing
33/// a TargetTransformInfo-compatible class.
35
36protected:
38
39 const DataLayout &DL;
40
42
43public:
44 // Provide value semantics. MSVC requires that we spell all of these out.
47
48 const DataLayout &getDataLayout() const { return DL; }
49
50 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
53 // In the basic model, we just assume that all-constant GEPs will be folded
54 // into their uses via addressing modes.
55 for (const Value *Operand : Operands)
56 if (!isa<Constant>(Operand))
57 return TTI::TCC_Basic;
58
59 return TTI::TCC_Free;
60 }
61
63 unsigned &JTSize,
65 BlockFrequencyInfo *BFI) const {
66 (void)PSI;
67 (void)BFI;
68 JTSize = 0;
69 return SI.getNumCases();
70 }
71
72 unsigned getInliningThresholdMultiplier() const { return 1; }
75 return 8;
76 }
77 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; }
78 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const {
79 return 0;
80 };
81
82 int getInlinerVectorBonusPercent() const { return 150; }
83
85 return TTI::TCC_Expensive;
86 }
87
89 return 64;
90 }
91
92 // Although this default value is arbitrary, it is not random. It is assumed
93 // that a condition that evaluates the same way by a higher percentage than
94 // this is best represented as control flow. Therefore, the default value N
95 // should be set such that the win from N% correct executions is greater than
96 // the loss from (100 - N)% mispredicted executions for the majority of
97 // intended targets.
99 return BranchProbability(99, 100);
100 }
101
103
104 bool hasBranchDivergence(const Function *F = nullptr) const { return false; }
105
106 bool isSourceOfDivergence(const Value *V) const { return false; }
107
108 bool isAlwaysUniform(const Value *V) const { return false; }
109
110 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
111 return false;
112 }
113
114 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
115 return true;
116 }
117
118 unsigned getFlatAddressSpace() const { return -1; }
119
121 Intrinsic::ID IID) const {
122 return false;
123 }
124
125 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
127 return AS == 0;
128 };
129
130 unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
131
132 bool isSingleThreaded() const { return false; }
133
134 std::pair<const Value *, unsigned>
136 return std::make_pair(nullptr, -1);
137 }
138
140 Value *NewV) const {
141 return nullptr;
142 }
143
144 bool isLoweredToCall(const Function *F) const {
145 assert(F && "A concrete function must be provided to this routine.");
146
147 // FIXME: These should almost certainly not be handled here, and instead
148 // handled with the help of TLI or the target itself. This was largely
149 // ported from existing analysis heuristics here so that such refactorings
150 // can take place in the future.
151
152 if (F->isIntrinsic())
153 return false;
154
155 if (F->hasLocalLinkage() || !F->hasName())
156 return true;
157
158 StringRef Name = F->getName();
159
160 // These will all likely lower to a single selection DAG node.
161 // clang-format off
162 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
163 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
164 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
165 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
166 Name == "sin" || Name == "sinf" || Name == "sinl" ||
167 Name == "cos" || Name == "cosf" || Name == "cosl" ||
168 Name == "tan" || Name == "tanf" || Name == "tanl" ||
169 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
170 return false;
171 // clang-format on
172 // These are all likely to be optimized into something smaller.
173 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
174 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
175 Name == "floorf" || Name == "ceil" || Name == "round" ||
176 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
177 Name == "llabs")
178 return false;
179
180 return true;
181 }
182
185 HardwareLoopInfo &HWLoopInfo) const {
186 return false;
187 }
188
189 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; }
190
192 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
194 }
195
196 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
197 IntrinsicInst &II) const {
198 return std::nullopt;
199 }
200
201 std::optional<Value *>
203 APInt DemandedMask, KnownBits &Known,
204 bool &KnownBitsComputed) const {
205 return std::nullopt;
206 }
207
209 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
210 APInt &UndefElts2, APInt &UndefElts3,
211 std::function<void(Instruction *, unsigned, APInt, APInt &)>
212 SimplifyAndSetOp) const {
213 return std::nullopt;
214 }
215
218 OptimizationRemarkEmitter *) const {}
219
221 TTI::PeelingPreferences &) const {}
222
223 bool isLegalAddImmediate(int64_t Imm) const { return false; }
224
225 bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
226
227 bool isLegalICmpImmediate(int64_t Imm) const { return false; }
228
229 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
230 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
231 Instruction *I = nullptr,
232 int64_t ScalableOffset = 0) const {
233 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
234 // taken from the implementation of LSR.
235 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
236 }
237
238 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const {
239 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
240 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
241 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
242 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
243 }
244
245 bool isNumRegsMajorCostOfLSR() const { return true; }
246
247 bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
248
249 bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
250
251 bool isProfitableLSRChainElement(Instruction *I) const { return false; }
252
253 bool canMacroFuseCmp() const { return false; }
254
257 TargetLibraryInfo *LibInfo) const {
258 return false;
259 }
260
263 return TTI::AMK_None;
264 }
265
266 bool isLegalMaskedStore(Type *DataType, Align Alignment) const {
267 return false;
269
270 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const {
271 return false;
272 }
273
274 bool isLegalNTStore(Type *DataType, Align Alignment) const {
275 // By default, assume nontemporal memory stores are available for stores
276 // that are aligned and have a size that is a power of 2.
277 unsigned DataSize = DL.getTypeStoreSize(DataType);
278 return Alignment >= DataSize && isPowerOf2_32(DataSize);
279 }
280
281 bool isLegalNTLoad(Type *DataType, Align Alignment) const {
282 // By default, assume nontemporal memory loads are available for loads that
283 // are aligned and have a size that is a power of 2.
284 unsigned DataSize = DL.getTypeStoreSize(DataType);
285 return Alignment >= DataSize && isPowerOf2_32(DataSize);
286 }
287
288 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const {
289 return false;
290 }
291
292 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
293 return false;
294 }
295
296 bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
297 return false;
298 }
299
300 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const {
301 return false;
302 }
303
305 Align Alignment) const {
306 return false;
307 }
308
309 bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const {
310 return false;
311 }
312
313 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
314 const SmallBitVector &OpcodeMask) const {
315 return false;
316 }
317
318 bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
319 return false;
320 }
321
322 bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
323 return false;
324 }
325
326 bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const {
327 return false;
328 }
329
330 bool enableOrderedReductions() const { return false; }
331
332 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; }
333
334 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
335 return false;
336 }
337
338 bool prefersVectorizedAddressing() const { return true; }
339
341 StackOffset BaseOffset, bool HasBaseReg,
342 int64_t Scale,
343 unsigned AddrSpace) const {
344 // Guess that all legal addressing mode are free.
345 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
346 Scale, AddrSpace, /*I=*/nullptr,
347 BaseOffset.getScalable()))
348 return 0;
349 return -1;
350 }
351
352 bool LSRWithInstrQueries() const { return false; }
353
354 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
355
356 bool isProfitableToHoist(Instruction *I) const { return true; }
357
358 bool useAA() const { return false; }
359
360 bool isTypeLegal(Type *Ty) const { return false; }
361
362 unsigned getRegUsageForType(Type *Ty) const { return 1; }
363
364 bool shouldBuildLookupTables() const { return true; }
365
366 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; }
367
368 bool shouldBuildRelLookupTables() const { return false; }
369
370 bool useColdCCForColdCall(Function &F) const { return false; }
371
373 const APInt &DemandedElts,
374 bool Insert, bool Extract,
376 return 0;
377 }
378
383 return 0;
384 }
385
386 bool supportsEfficientVectorElementLoadStore() const { return false; }
387
388 bool supportsTailCalls() const { return true; }
389
390 bool enableAggressiveInterleaving(bool LoopHasReductions) const {
391 return false;
392 }
393
395 bool IsZeroCmp) const {
396 return {};
397 }
398
399 bool enableSelectOptimize() const { return true; }
400
402 // If the select is a logical-and/logical-or then it is better treated as a
403 // and/or by the backend.
404 using namespace llvm::PatternMatch;
405 return isa<SelectInst>(I) &&
408 }
409
410 bool enableInterleavedAccessVectorization() const { return false; }
411
412 bool enableMaskedInterleavedAccessVectorization() const { return false; }
413
414 bool isFPVectorizationPotentiallyUnsafe() const { return false; }
415
417 unsigned AddressSpace, Align Alignment,
418 unsigned *Fast) const {
419 return false;
420 }
421
422 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const {
423 return TTI::PSK_Software;
424 }
425
426 bool haveFastSqrt(Type *Ty) const { return false; }
427
428 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; }
429
430 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
431
434 }
435
436 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
437 const APInt &Imm, Type *Ty) const {
438 return 0;
439 }
440
443 return TTI::TCC_Basic;
444 }
445
446 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
447 const APInt &Imm, Type *Ty,
449 Instruction *Inst = nullptr) const {
450 return TTI::TCC_Free;
451 }
452
454 const APInt &Imm, Type *Ty,
456 return TTI::TCC_Free;
457 }
458
460 const Function &Fn) const {
461 return false;
462 }
463
464 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
465 bool hasConditionalLoadStoreForType(Type *Ty) const { return false; }
466
467 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
468 return Vector ? 1 : 0;
469 };
470
471 const char *getRegisterClassName(unsigned ClassID) const {
472 switch (ClassID) {
473 default:
474 return "Generic::Unknown Register Class";
475 case 0:
476 return "Generic::ScalarRC";
477 case 1:
478 return "Generic::VectorRC";
479 }
480 }
481
483 return TypeSize::getFixed(32);
484 }
485
486 unsigned getMinVectorRegisterBitWidth() const { return 128; }
487
488 std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
489 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; }
490 bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
491
492 bool
494 return false;
495 }
496
497 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
498 return ElementCount::get(0, IsScalable);
499 }
500
501 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; }
502 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; }
503
505 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
506 AllowPromotionWithoutCommonHeader = false;
507 return false;
508 }
509
510 unsigned getCacheLineSize() const { return 0; }
511 std::optional<unsigned>
513 switch (Level) {
515 [[fallthrough]];
517 return std::nullopt;
518 }
519 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
520 }
521
522 std::optional<unsigned>
524 switch (Level) {
526 [[fallthrough]];
528 return std::nullopt;
529 }
530
531 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
532 }
533
534 std::optional<unsigned> getMinPageSize() const { return {}; }
535
536 unsigned getPrefetchDistance() const { return 0; }
537 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
538 unsigned NumStridedMemAccesses,
539 unsigned NumPrefetches, bool HasCall) const {
540 return 1;
541 }
542 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
543 bool enableWritePrefetching() const { return false; }
544 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
545
546 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
547
549 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
552 const Instruction *CxtI = nullptr) const {
553 // Widenable conditions will eventually lower into constants, so some
554 // operations with them will be trivially optimized away.
555 auto IsWidenableCondition = [](const Value *V) {
556 if (auto *II = dyn_cast<IntrinsicInst>(V))
557 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
558 return true;
559 return false;
560 };
561 // FIXME: A number of transformation tests seem to require these values
562 // which seems a little odd for how arbitary there are.
563 switch (Opcode) {
564 default:
565 break;
566 case Instruction::FDiv:
567 case Instruction::FRem:
568 case Instruction::SDiv:
569 case Instruction::SRem:
570 case Instruction::UDiv:
571 case Instruction::URem:
572 // FIXME: Unlikely to be true for CodeSize.
573 return TTI::TCC_Expensive;
574 case Instruction::And:
575 case Instruction::Or:
576 if (any_of(Args, IsWidenableCondition))
577 return TTI::TCC_Free;
578 break;
579 }
580
581 // Assume a 3cy latency for fp arithmetic ops.
583 if (Ty->getScalarType()->isFloatingPointTy())
584 return 3;
585
586 return 1;
587 }
588
590 unsigned Opcode1,
591 const SmallBitVector &OpcodeMask,
594 }
595
597 ArrayRef<int> Mask,
599 VectorType *SubTp,
600 ArrayRef<const Value *> Args = std::nullopt,
601 const Instruction *CxtI = nullptr) const {
602 return 1;
603 }
604
605 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
608 const Instruction *I) const {
609 switch (Opcode) {
610 default:
611 break;
612 case Instruction::IntToPtr: {
613 unsigned SrcSize = Src->getScalarSizeInBits();
614 if (DL.isLegalInteger(SrcSize) &&
615 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
616 return 0;
617 break;
618 }
619 case Instruction::PtrToInt: {
620 unsigned DstSize = Dst->getScalarSizeInBits();
621 if (DL.isLegalInteger(DstSize) &&
622 DstSize >= DL.getPointerTypeSizeInBits(Src))
623 return 0;
624 break;
625 }
626 case Instruction::BitCast:
627 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
628 // Identity and pointer-to-pointer casts are free.
629 return 0;
630 break;
631 case Instruction::Trunc: {
632 // trunc to a native type is free (assuming the target has compare and
633 // shift-right of the same width).
634 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
635 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
636 return 0;
637 break;
638 }
639 }
640 return 1;
641 }
642
644 VectorType *VecTy,
645 unsigned Index) const {
646 return 1;
647 }
648
650 const Instruction *I = nullptr) const {
651 // A phi would be free, unless we're costing the throughput because it
652 // will require a register.
653 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
654 return 0;
655 return 1;
656 }
657
658 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
659 CmpInst::Predicate VecPred,
661 const Instruction *I) const {
662 return 1;
663 }
664
667 unsigned Index, Value *Op0,
668 Value *Op1) const {
669 return 1;
670 }
671
674 unsigned Index) const {
675 return 1;
676 }
677
678 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
679 const APInt &DemandedDstElts,
681 return 1;
682 }
683
684 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
685 unsigned AddressSpace,
688 const Instruction *I) const {
689 return 1;
690 }
691
692 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
693 unsigned AddressSpace,
695 const Instruction *I) const {
696 return 1;
697 }
698
700 Align Alignment, unsigned AddressSpace,
702 return 1;
703 }
704
706 const Value *Ptr, bool VariableMask,
707 Align Alignment,
709 const Instruction *I = nullptr) const {
710 return 1;
711 }
712
714 const Value *Ptr, bool VariableMask,
715 Align Alignment,
717 const Instruction *I = nullptr) const {
719 }
720
722 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
723 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
724 bool UseMaskForCond, bool UseMaskForGaps) const {
725 return 1;
726 }
727
730 switch (ICA.getID()) {
731 default:
732 break;
733 case Intrinsic::experimental_vector_histogram_add:
734 // For now, we want explicit support from the target for histograms.
736 case Intrinsic::allow_runtime_check:
737 case Intrinsic::allow_ubsan_check:
738 case Intrinsic::annotation:
739 case Intrinsic::assume:
740 case Intrinsic::sideeffect:
741 case Intrinsic::pseudoprobe:
742 case Intrinsic::arithmetic_fence:
743 case Intrinsic::dbg_assign:
744 case Intrinsic::dbg_declare:
745 case Intrinsic::dbg_value:
746 case Intrinsic::dbg_label:
747 case Intrinsic::invariant_start:
748 case Intrinsic::invariant_end:
749 case Intrinsic::launder_invariant_group:
750 case Intrinsic::strip_invariant_group:
751 case Intrinsic::is_constant:
752 case Intrinsic::lifetime_start:
753 case Intrinsic::lifetime_end:
754 case Intrinsic::experimental_noalias_scope_decl:
755 case Intrinsic::objectsize:
756 case Intrinsic::ptr_annotation:
757 case Intrinsic::var_annotation:
758 case Intrinsic::experimental_gc_result:
759 case Intrinsic::experimental_gc_relocate:
760 case Intrinsic::coro_alloc:
761 case Intrinsic::coro_begin:
762 case Intrinsic::coro_free:
763 case Intrinsic::coro_end:
764 case Intrinsic::coro_frame:
765 case Intrinsic::coro_size:
766 case Intrinsic::coro_align:
767 case Intrinsic::coro_suspend:
768 case Intrinsic::coro_subfn_addr:
769 case Intrinsic::threadlocal_address:
770 case Intrinsic::experimental_widenable_condition:
771 case Intrinsic::ssa_copy:
772 // These intrinsics don't actually represent code after lowering.
773 return 0;
774 }
775 return 1;
776 }
777
781 return 1;
782 }
783
784 // Assume that we have a register of the right size for the type.
785 unsigned getNumberOfParts(Type *Tp) const { return 1; }
786
788 const SCEV *) const {
789 return 0;
790 }
791
793 std::optional<FastMathFlags> FMF,
794 TTI::TargetCostKind) const {
795 return 1;
796 }
797
800 TTI::TargetCostKind) const {
801 return 1;
802 }
803
804 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned,
805 Type *ResTy, VectorType *Ty,
806 FastMathFlags FMF,
808 return 1;
809 }
810
812 VectorType *Ty,
814 return 1;
815 }
816
818 return 0;
819 }
820
822 return false;
823 }
824
826 // Note for overrides: You must ensure for all element unordered-atomic
827 // memory intrinsics that all power-of-2 element sizes up to, and
828 // including, the return value of this method have a corresponding
829 // runtime lib call. These runtime lib call definitions can be found
830 // in RuntimeLibcalls.h
831 return 0;
832 }
833
835 Type *ExpectedType) const {
836 return nullptr;
837 }
838
839 Type *
841 unsigned SrcAddrSpace, unsigned DestAddrSpace,
842 unsigned SrcAlign, unsigned DestAlign,
843 std::optional<uint32_t> AtomicElementSize) const {
844 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
845 : Type::getInt8Ty(Context);
846 }
847
849 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
850 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
851 unsigned SrcAlign, unsigned DestAlign,
852 std::optional<uint32_t> AtomicCpySize) const {
853 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1;
854 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
855 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
856 OpsOut.push_back(OpType);
857 }
858
859 bool areInlineCompatible(const Function *Caller,
860 const Function *Callee) const {
861 return (Caller->getFnAttribute("target-cpu") ==
862 Callee->getFnAttribute("target-cpu")) &&
863 (Caller->getFnAttribute("target-features") ==
864 Callee->getFnAttribute("target-features"));
865 }
866
867 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
868 unsigned DefaultCallPenalty) const {
869 return DefaultCallPenalty;
870 }
871
872 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
873 const ArrayRef<Type *> &Types) const {
874 return (Caller->getFnAttribute("target-cpu") ==
875 Callee->getFnAttribute("target-cpu")) &&
876 (Caller->getFnAttribute("target-features") ==
877 Callee->getFnAttribute("target-features"));
878 }
879
881 const DataLayout &DL) const {
882 return false;
883 }
884
886 const DataLayout &DL) const {
887 return false;
888 }
889
890 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
891
892 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
893
894 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
895
896 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
897 unsigned AddrSpace) const {
898 return true;
899 }
900
901 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
902 unsigned AddrSpace) const {
903 return true;
904 }
905
907 ElementCount VF) const {
908 return true;
909 }
910
911 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
912
913 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
914 unsigned ChainSizeInBytes,
915 VectorType *VecTy) const {
916 return VF;
917 }
918
919 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
920 unsigned ChainSizeInBytes,
921 VectorType *VecTy) const {
922 return VF;
923 }
924
925 bool preferFixedOverScalableIfEqualCost() const { return false; }
926
927 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
928 TTI::ReductionFlags Flags) const {
929 return false;
930 }
931
932 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
933 TTI::ReductionFlags Flags) const {
934 return false;
935 }
936
938 return true;
939 }
940
941 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; }
942
946 }
947
948 unsigned getGISelRematGlobalCost() const { return 1; }
949
950 unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
951
952 bool supportsScalableVectors() const { return false; }
953
954 bool enableScalableVectorization() const { return false; }
955
956 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
957 Align Alignment) const {
958 return false;
959 }
960
965 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert);
966 }
967
968 bool hasArmWideBranch(bool) const { return false; }
969
970 unsigned getMaxNumArgs() const { return UINT_MAX; }
971
972protected:
973 // Obtain the minimum required size to hold the value (without the sign)
974 // In case of a vector it returns the min required size for one element.
975 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
976 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
977 const auto *VectorValue = cast<Constant>(Val);
978
979 // In case of a vector need to pick the max between the min
980 // required size for each element
981 auto *VT = cast<FixedVectorType>(Val->getType());
982
983 // Assume unsigned elements
984 isSigned = false;
985
986 // The max required size is the size of the vector element type
987 unsigned MaxRequiredSize =
988 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
989
990 unsigned MinRequiredSize = 0;
991 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
992 if (auto *IntElement =
993 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
994 bool signedElement = IntElement->getValue().isNegative();
995 // Get the element min required size.
996 unsigned ElementMinRequiredSize =
997 IntElement->getValue().getSignificantBits() - 1;
998 // In case one element is signed then all the vector is signed.
999 isSigned |= signedElement;
1000 // Save the max required bit size between all the elements.
1001 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1002 } else {
1003 // not an int constant element
1004 return MaxRequiredSize;
1005 }
1006 }
1007 return MinRequiredSize;
1008 }
1009
1010 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1011 isSigned = CI->getValue().isNegative();
1012 return CI->getValue().getSignificantBits() - 1;
1013 }
1014
1015 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1016 isSigned = true;
1017 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1018 }
1019
1020 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1021 isSigned = false;
1022 return Cast->getSrcTy()->getScalarSizeInBits();
1023 }
1024
1025 isSigned = false;
1026 return Val->getType()->getScalarSizeInBits();
1027 }
1028
1029 bool isStridedAccess(const SCEV *Ptr) const {
1030 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1031 }
1032
1034 const SCEV *Ptr) const {
1035 if (!isStridedAccess(Ptr))
1036 return nullptr;
1037 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1038 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1039 }
1040
1042 int64_t MergeDistance) const {
1043 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1044 if (!Step)
1045 return false;
1046 APInt StrideVal = Step->getAPInt();
1047 if (StrideVal.getBitWidth() > 64)
1048 return false;
1049 // FIXME: Need to take absolute value for negative stride case.
1050 return StrideVal.getSExtValue() < MergeDistance;
1051 }
1052};
1053
1054/// CRTP base class for use as a mix-in that aids implementing
1055/// a TargetTransformInfo-compatible class.
1056template <typename T>
1058private:
1060
1061protected:
1063
1064public:
1066
1070 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1071 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1072 bool HasBaseReg = (BaseGV == nullptr);
1073
1074 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1075 APInt BaseOffset(PtrSizeBits, 0);
1076 int64_t Scale = 0;
1077
1078 auto GTI = gep_type_begin(PointeeType, Operands);
1079 Type *TargetType = nullptr;
1080
1081 // Handle the case where the GEP instruction has a single operand,
1082 // the basis, therefore TargetType is a nullptr.
1083 if (Operands.empty())
1084 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1085
1086 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1087 TargetType = GTI.getIndexedType();
1088 // We assume that the cost of Scalar GEP with constant index and the
1089 // cost of Vector GEP with splat constant index are the same.
1090 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1091 if (!ConstIdx)
1092 if (auto Splat = getSplatValue(*I))
1093 ConstIdx = dyn_cast<ConstantInt>(Splat);
1094 if (StructType *STy = GTI.getStructTypeOrNull()) {
1095 // For structures the index is always splat or scalar constant
1096 assert(ConstIdx && "Unexpected GEP index");
1097 uint64_t Field = ConstIdx->getZExtValue();
1098 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1099 } else {
1100 // If this operand is a scalable type, bail out early.
1101 // TODO: Make isLegalAddressingMode TypeSize aware.
1102 if (TargetType->isScalableTy())
1103 return TTI::TCC_Basic;
1104 int64_t ElementSize =
1105 GTI.getSequentialElementStride(DL).getFixedValue();
1106 if (ConstIdx) {
1107 BaseOffset +=
1108 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1109 } else {
1110 // Needs scale register.
1111 if (Scale != 0)
1112 // No addressing mode takes two scale registers.
1113 return TTI::TCC_Basic;
1114 Scale = ElementSize;
1115 }
1116 }
1117 }
1118
1119 // If we haven't been provided a hint, use the target type for now.
1120 //
1121 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1122 // as it's possible to have a GEP with a foldable target type but a memory
1123 // access that isn't foldable. For example, this load isn't foldable on
1124 // RISC-V:
1125 //
1126 // %p = getelementptr i32, ptr %base, i32 42
1127 // %x = load <2 x i32>, ptr %p
1128 if (!AccessType)
1129 AccessType = TargetType;
1130
1131 // If the final address of the GEP is a legal addressing mode for the given
1132 // access type, then we can fold it into its users.
1133 if (static_cast<T *>(this)->isLegalAddressingMode(
1134 AccessType, const_cast<GlobalValue *>(BaseGV),
1135 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1136 Ptr->getType()->getPointerAddressSpace()))
1137 return TTI::TCC_Free;
1138
1139 // TODO: Instead of returning TCC_Basic here, we should use
1140 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1141 // model it.
1142 return TTI::TCC_Basic;
1143 }
1144
1146 const Value *Base,
1148 Type *AccessTy,
1151 // In the basic model we take into account GEP instructions only
1152 // (although here can come alloca instruction, a value, constants and/or
1153 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1154 // pointer). Typically, if Base is a not a GEP-instruction and all the
1155 // pointers are relative to the same base address, all the rest are
1156 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1157 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1158 // any their index is a non-const.
1159 // If no known dependecies between the pointers cost is calculated as a sum
1160 // of costs of GEP instructions.
1161 for (const Value *V : Ptrs) {
1162 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1163 if (!GEP)
1164 continue;
1165 if (Info.isSameBase() && V != Base) {
1166 if (GEP->hasAllConstantIndices())
1167 continue;
1168 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1169 Instruction::Add, GEP->getType(), CostKind,
1171 std::nullopt);
1172 } else {
1173 SmallVector<const Value *> Indices(GEP->indices());
1174 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
1175 GEP->getPointerOperand(),
1176 Indices, AccessTy, CostKind);
1177 }
1178 }
1179 return Cost;
1180 }
1181
1185 using namespace llvm::PatternMatch;
1186
1187 auto *TargetTTI = static_cast<T *>(this);
1188 // Handle non-intrinsic calls, invokes, and callbr.
1189 // FIXME: Unlikely to be true for anything but CodeSize.
1190 auto *CB = dyn_cast<CallBase>(U);
1191 if (CB && !isa<IntrinsicInst>(U)) {
1192 if (const Function *F = CB->getCalledFunction()) {
1193 if (!TargetTTI->isLoweredToCall(F))
1194 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1195
1196 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1197 }
1198 // For indirect or other calls, scale cost by number of arguments.
1199 return TTI::TCC_Basic * (CB->arg_size() + 1);
1200 }
1201
1202 Type *Ty = U->getType();
1203 unsigned Opcode = Operator::getOpcode(U);
1204 auto *I = dyn_cast<Instruction>(U);
1205 switch (Opcode) {
1206 default:
1207 break;
1208 case Instruction::Call: {
1209 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1210 auto *Intrinsic = cast<IntrinsicInst>(U);
1211 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1212 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1213 }
1214 case Instruction::Br:
1215 case Instruction::Ret:
1216 case Instruction::PHI:
1217 case Instruction::Switch:
1218 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1219 case Instruction::ExtractValue:
1220 case Instruction::Freeze:
1221 return TTI::TCC_Free;
1222 case Instruction::Alloca:
1223 if (cast<AllocaInst>(U)->isStaticAlloca())
1224 return TTI::TCC_Free;
1225 break;
1226 case Instruction::GetElementPtr: {
1227 const auto *GEP = cast<GEPOperator>(U);
1228 Type *AccessType = nullptr;
1229 // For now, only provide the AccessType in the simple case where the GEP
1230 // only has one user.
1231 if (GEP->hasOneUser() && I)
1232 AccessType = I->user_back()->getAccessType();
1233
1234 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1235 Operands.front(), Operands.drop_front(),
1236 AccessType, CostKind);
1237 }
1238 case Instruction::Add:
1239 case Instruction::FAdd:
1240 case Instruction::Sub:
1241 case Instruction::FSub:
1242 case Instruction::Mul:
1243 case Instruction::FMul:
1244 case Instruction::UDiv:
1245 case Instruction::SDiv:
1246 case Instruction::FDiv:
1247 case Instruction::URem:
1248 case Instruction::SRem:
1249 case Instruction::FRem:
1250 case Instruction::Shl:
1251 case Instruction::LShr:
1252 case Instruction::AShr:
1253 case Instruction::And:
1254 case Instruction::Or:
1255 case Instruction::Xor:
1256 case Instruction::FNeg: {
1258 TTI::OperandValueInfo Op2Info;
1259 if (Opcode != Instruction::FNeg)
1260 Op2Info = TTI::getOperandInfo(Operands[1]);
1261 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1262 Op2Info, Operands, I);
1263 }
1264 case Instruction::IntToPtr:
1265 case Instruction::PtrToInt:
1266 case Instruction::SIToFP:
1267 case Instruction::UIToFP:
1268 case Instruction::FPToUI:
1269 case Instruction::FPToSI:
1270 case Instruction::Trunc:
1271 case Instruction::FPTrunc:
1272 case Instruction::BitCast:
1273 case Instruction::FPExt:
1274 case Instruction::SExt:
1275 case Instruction::ZExt:
1276 case Instruction::AddrSpaceCast: {
1277 Type *OpTy = Operands[0]->getType();
1278 return TargetTTI->getCastInstrCost(
1279 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1280 }
1281 case Instruction::Store: {
1282 auto *SI = cast<StoreInst>(U);
1283 Type *ValTy = Operands[0]->getType();
1285 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1286 SI->getPointerAddressSpace(), CostKind,
1287 OpInfo, I);
1288 }
1289 case Instruction::Load: {
1290 // FIXME: Arbitary cost which could come from the backend.
1292 return 4;
1293 auto *LI = cast<LoadInst>(U);
1294 Type *LoadType = U->getType();
1295 // If there is a non-register sized type, the cost estimation may expand
1296 // it to be several instructions to load into multiple registers on the
1297 // target. But, if the only use of the load is a trunc instruction to a
1298 // register sized type, the instruction selector can combine these
1299 // instructions to be a single load. So, in this case, we use the
1300 // destination type of the trunc instruction rather than the load to
1301 // accurately estimate the cost of this load instruction.
1302 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1303 !LoadType->isVectorTy()) {
1304 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1305 LoadType = TI->getDestTy();
1306 }
1307 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1309 {TTI::OK_AnyValue, TTI::OP_None}, I);
1310 }
1311 case Instruction::Select: {
1312 const Value *Op0, *Op1;
1313 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1314 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1315 // select x, y, false --> x & y
1316 // select x, true, y --> x | y
1317 const auto Op1Info = TTI::getOperandInfo(Op0);
1318 const auto Op2Info = TTI::getOperandInfo(Op1);
1319 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1320 Op1->getType()->getScalarSizeInBits() == 1);
1321
1323 return TargetTTI->getArithmeticInstrCost(
1324 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1325 CostKind, Op1Info, Op2Info, Operands, I);
1326 }
1327 Type *CondTy = Operands[0]->getType();
1328 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1330 CostKind, I);
1331 }
1332 case Instruction::ICmp:
1333 case Instruction::FCmp: {
1334 Type *ValTy = Operands[0]->getType();
1335 // TODO: Also handle ICmp/FCmp constant expressions.
1336 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1337 I ? cast<CmpInst>(I)->getPredicate()
1339 CostKind, I);
1340 }
1341 case Instruction::InsertElement: {
1342 auto *IE = dyn_cast<InsertElementInst>(U);
1343 if (!IE)
1344 return TTI::TCC_Basic; // FIXME
1345 unsigned Idx = -1;
1346 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1347 if (CI->getValue().getActiveBits() <= 32)
1348 Idx = CI->getZExtValue();
1349 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx);
1350 }
1351 case Instruction::ShuffleVector: {
1352 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1353 if (!Shuffle)
1354 return TTI::TCC_Basic; // FIXME
1355
1356 auto *VecTy = cast<VectorType>(U->getType());
1357 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1358 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1359 int NumSubElts, SubIndex;
1360
1361 // TODO: move more of this inside improveShuffleKindFromMask.
1362 if (Shuffle->changesLength()) {
1363 // Treat a 'subvector widening' as a free shuffle.
1364 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1365 return 0;
1366
1367 if (Shuffle->isExtractSubvectorMask(SubIndex))
1368 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy,
1369 Mask, CostKind, SubIndex, VecTy,
1370 Operands, Shuffle);
1371
1372 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1373 return TargetTTI->getShuffleCost(
1374 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1375 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1376 Operands, Shuffle);
1377
1378 int ReplicationFactor, VF;
1379 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1380 APInt DemandedDstElts = APInt::getZero(Mask.size());
1381 for (auto I : enumerate(Mask)) {
1382 if (I.value() != PoisonMaskElem)
1383 DemandedDstElts.setBit(I.index());
1384 }
1385 return TargetTTI->getReplicationShuffleCost(
1386 VecSrcTy->getElementType(), ReplicationFactor, VF,
1387 DemandedDstElts, CostKind);
1388 }
1389
1390 bool IsUnary = isa<UndefValue>(Operands[1]);
1391 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1392 SmallVector<int, 16> AdjustMask(Mask.begin(), Mask.end());
1393
1394 // Widening shuffle - widening the source(s) to the new length
1395 // (treated as free - see above), and then perform the adjusted
1396 // shuffle at that width.
1397 if (Shuffle->increasesLength()) {
1398 for (int &M : AdjustMask)
1399 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1400
1401 return TargetTTI->getShuffleCost(
1403 AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1404 }
1405
1406 // Narrowing shuffle - perform shuffle at original wider width and
1407 // then extract the lower elements.
1408 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1409
1410 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1412 VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1413
1414 SmallVector<int, 16> ExtractMask(Mask.size());
1415 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1416 return ShuffleCost + TargetTTI->getShuffleCost(
1417 TTI::SK_ExtractSubvector, VecSrcTy,
1418 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1419 }
1420
1421 if (Shuffle->isIdentity())
1422 return 0;
1423
1424 if (Shuffle->isReverse())
1425 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, Mask, CostKind,
1426 0, nullptr, Operands, Shuffle);
1427
1428 if (Shuffle->isSelect())
1429 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, Mask, CostKind,
1430 0, nullptr, Operands, Shuffle);
1431
1432 if (Shuffle->isTranspose())
1433 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, Mask,
1434 CostKind, 0, nullptr, Operands,
1435 Shuffle);
1436
1437 if (Shuffle->isZeroEltSplat())
1438 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, Mask,
1439 CostKind, 0, nullptr, Operands,
1440 Shuffle);
1441
1442 if (Shuffle->isSingleSource())
1443 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, Mask,
1444 CostKind, 0, nullptr, Operands,
1445 Shuffle);
1446
1447 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1448 return TargetTTI->getShuffleCost(
1449 TTI::SK_InsertSubvector, VecTy, Mask, CostKind, SubIndex,
1450 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1451 Shuffle);
1452
1453 if (Shuffle->isSplice(SubIndex))
1454 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, Mask, CostKind,
1455 SubIndex, nullptr, Operands, Shuffle);
1456
1457 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, Mask,
1458 CostKind, 0, nullptr, Operands, Shuffle);
1459 }
1460 case Instruction::ExtractElement: {
1461 auto *EEI = dyn_cast<ExtractElementInst>(U);
1462 if (!EEI)
1463 return TTI::TCC_Basic; // FIXME
1464 unsigned Idx = -1;
1465 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1466 if (CI->getValue().getActiveBits() <= 32)
1467 Idx = CI->getZExtValue();
1468 Type *DstTy = Operands[0]->getType();
1469 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1470 }
1471 }
1472
1473 // By default, just classify everything as 'basic' or -1 to represent that
1474 // don't know the throughput cost.
1476 }
1477
1479 auto *TargetTTI = static_cast<T *>(this);
1480 SmallVector<const Value *, 4> Ops(I->operand_values());
1481 InstructionCost Cost = TargetTTI->getInstructionCost(
1484 }
1485
1486 bool supportsTailCallFor(const CallBase *CB) const {
1487 return static_cast<const T *>(this)->supportsTailCalls();
1488 }
1489};
1490} // namespace llvm
1491
1492#endif
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
static bool isSigned(unsigned int Opcode)
Hexagon Common GEP
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
uint64_t IntrinsicInst * II
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition: APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1310
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1448
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1010
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:180
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1522
an instruction to allocate memory on the stack
Definition: Instructions.h:61
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLegalInteger(uint64_t Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU.
Definition: DataLayout.h:260
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:720
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:763
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:672
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Definition: DataLayout.h:472
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:317
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
The core instruction combiner logic.
Definition: InstCombiner.h:47
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition: Operator.h:42
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:71
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
An instruction for storing to memory.
Definition: Instructions.h:290
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Provides information about what library functions are available for the current target.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
const DataLayout & getDataLayout() const
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const
bool isLegalToVectorizeStore(StoreInst *SI) const
bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
bool shouldTreatInstructionLikeSelect(const Instruction *I)
bool isLegalToVectorizeLoad(LoadInst *LI) const
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
std::optional< unsigned > getVScaleForTuning() const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
bool isLegalICmpImmediate(int64_t Imm) const
bool hasConditionalLoadStoreForType(Type *Ty) const
unsigned getRegUsageForType(Type *Ty) const
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
bool isAlwaysUniform(const Value *V) const
bool isProfitableToHoist(Instruction *I) const
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
bool isTruncateFree(Type *Ty1, Type *Ty2) const
bool isStridedAccess(const SCEV *Ptr) const
InstructionCost getBranchMispredictPenalty() const
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
InstructionCost getFPOpCost(Type *Ty) const
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
std::optional< unsigned > getMaxVScale() const
TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
bool isProfitableLSRChainElement(Instruction *I) const
InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr) const
InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I) const
InstructionCost getStridedMemoryOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
bool isNoopAddrSpaceCast(unsigned, unsigned) const
unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const
std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
unsigned getAssumedAddrSpace(const Value *V) const
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
bool isLegalNTStore(Type *DataType, Align Alignment) const
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
unsigned adjustInliningThreshold(const CallBase *CB) const
BranchProbability getPredictableBranchThreshold() const
std::optional< unsigned > getMinPageSize() const
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind) const
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool isSourceOfDivergence(const Value *V) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
unsigned getMaxInterleaveFactor(ElementCount VF) const
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, const DataLayout &DL) const
bool hasDivRemOp(Type *DataType, bool IsSigned) const
InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, const SCEV *) const
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I) const
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
bool preferInLoopReduction(unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind) const
bool isLoweredToCall(const Function *F) const
bool hasBranchDivergence(const Function *F=nullptr) const
TargetTransformInfoImplBase(const DataLayout &DL)
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
const char * getRegisterClassName(unsigned ClassID) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1) const
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
bool useColdCCForColdCall(Function &F) const
bool shouldExpandReduction(const IntrinsicInst *II) const
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
unsigned getNumberOfRegisters(unsigned ClassID) const
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
bool isLegalAddScalableImmediate(int64_t Imm) const
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
bool shouldBuildLookupTablesForConstant(Constant *C) const
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class.
bool supportsTailCallFor(const CallBase *CB) const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind)
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)
bool isExpensiveToSpeculativelyExecute(const Instruction *I)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static IntegerType * getInt8Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:480
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2400
AddressSpace
Definition: NVPTXBaseInfo.h:21
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
constexpr int PoisonMaskElem
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
gep_type_iterator gep_type_begin(const User *GEP)
InstructionCost Cost
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Attributes of a target dependent hardware loop.
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Flags describing the kind of vector reduction.
Parameters that control the generic loop unrolling transformation.