LLVM 23.0.0git
TargetTransformInfoImpl.h
Go to the documentation of this file.
1//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
21#include "llvm/IR/DataLayout.h"
24#include "llvm/IR/Operator.h"
26#include <optional>
27#include <utility>
28
29namespace llvm {
30
31class Function;
32
33/// Base class for use as a mix-in that aids implementing
34/// a TargetTransformInfo-compatible class.
36
37protected:
39
40 const DataLayout &DL;
41
43
44public:
46
47 // Provide value semantics. MSVC requires that we spell all of these out.
50
51 virtual const DataLayout &getDataLayout() const { return DL; }
52
53 // FIXME: It looks like this implementation is dead. All clients appear to
54 // use the (non-const) version from `TargetTransformInfoImplCRTPBase`.
55 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
57 Type *AccessType,
59 // In the basic model, we just assume that all-constant GEPs will be folded
60 // into their uses via addressing modes.
61 for (const Value *Operand : Operands)
62 if (!isa<Constant>(Operand))
63 return TTI::TCC_Basic;
64
65 return TTI::TCC_Free;
66 }
67
68 virtual InstructionCost
70 const TTI::PointersChainInfo &Info, Type *AccessTy,
72 llvm_unreachable("Not implemented");
73 }
74
75 virtual unsigned
78 BlockFrequencyInfo *BFI) const {
79 (void)PSI;
80 (void)BFI;
81 JTSize = 0;
82 return SI.getNumCases();
83 }
84
85 virtual InstructionCost
88 llvm_unreachable("Not implemented");
89 }
90
91 virtual unsigned getInliningThresholdMultiplier() const { return 1; }
93 return 8;
94 }
96 return 8;
97 }
99 // This is the value of InlineConstants::LastCallToStaticBonus before it was
100 // removed along with the introduction of this function.
101 return 15000;
102 }
103 virtual unsigned adjustInliningThreshold(const CallBase *CB) const {
104 return 0;
105 }
106 virtual unsigned getCallerAllocaCost(const CallBase *CB,
107 const AllocaInst *AI) const {
108 return 0;
109 };
110
111 virtual int getInlinerVectorBonusPercent() const { return 150; }
112
114 return TTI::TCC_Expensive;
115 }
116
117 virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { return 64; }
118
119 // Although this default value is arbitrary, it is not random. It is assumed
120 // that a condition that evaluates the same way by a higher percentage than
121 // this is best represented as control flow. Therefore, the default value N
122 // should be set such that the win from N% correct executions is greater than
123 // the loss from (100 - N)% mispredicted executions for the majority of
124 // intended targets.
126 return BranchProbability(99, 100);
127 }
128
129 virtual InstructionCost getBranchMispredictPenalty() const { return 0; }
130
131 virtual bool hasBranchDivergence(const Function *F = nullptr) const {
132 return false;
133 }
134
138
139 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
140 return false;
141 }
142
143 virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const {
144 return true;
145 }
146
147 virtual unsigned getFlatAddressSpace() const { return -1; }
148
150 Intrinsic::ID IID) const {
151 return false;
152 }
153
154 virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
155
156 virtual std::pair<KnownBits, KnownBits>
157 computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const {
158 const Type *PtrTy = PtrOp.getType();
159 assert(PtrTy->isPtrOrPtrVectorTy() &&
160 "expected pointer or pointer vector type");
161 unsigned FromAS = PtrTy->getPointerAddressSpace();
162
163 if (DL.isNonIntegralAddressSpace(FromAS))
164 return std::pair(KnownBits(DL.getPointerSizeInBits(FromAS)),
165 KnownBits(DL.getPointerSizeInBits(ToAS)));
166
167 KnownBits FromPtrBits;
168 if (const AddrSpaceCastInst *CastI = dyn_cast<AddrSpaceCastInst>(&PtrOp)) {
169 std::pair<KnownBits, KnownBits> KB = computeKnownBitsAddrSpaceCast(
170 CastI->getDestAddressSpace(), *CastI->getPointerOperand());
171 FromPtrBits = KB.second;
172 } else {
173 FromPtrBits = computeKnownBits(&PtrOp, DL, nullptr);
174 }
175
176 KnownBits ToPtrBits =
177 computeKnownBitsAddrSpaceCast(FromAS, ToAS, FromPtrBits);
178
179 return {FromPtrBits, ToPtrBits};
180 }
181
182 virtual KnownBits
183 computeKnownBitsAddrSpaceCast(unsigned FromAS, unsigned ToAS,
184 const KnownBits &FromPtrBits) const {
185 unsigned ToASBitSize = DL.getPointerSizeInBits(ToAS);
186
187 if (DL.isNonIntegralAddressSpace(FromAS))
188 return KnownBits(ToASBitSize);
189
190 // By default, we assume that all valid "larger" (e.g. 64-bit) to "smaller"
191 // (e.g. 32-bit) casts work by chopping off the high bits.
192 // By default, we do not assume that null results in null again.
193 return FromPtrBits.anyextOrTrunc(ToASBitSize);
194 }
195
196 virtual bool
198 return AS == 0;
199 };
200
201 virtual unsigned getAssumedAddrSpace(const Value *V) const { return -1; }
202
203 virtual bool isSingleThreaded() const { return false; }
204
205 virtual std::pair<const Value *, unsigned>
207 return std::make_pair(nullptr, -1);
208 }
209
211 Value *OldV,
212 Value *NewV) const {
213 return nullptr;
214 }
215
216 virtual bool isLoweredToCall(const Function *F) const {
217 assert(F && "A concrete function must be provided to this routine.");
218
219 // FIXME: These should almost certainly not be handled here, and instead
220 // handled with the help of TLI or the target itself. This was largely
221 // ported from existing analysis heuristics here so that such refactorings
222 // can take place in the future.
223
224 if (F->isIntrinsic())
225 return false;
226
227 if (F->hasLocalLinkage() || !F->hasName())
228 return true;
229
230 StringRef Name = F->getName();
231
232 // These will all likely lower to a single selection DAG node.
233 // clang-format off
234 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
235 Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
236 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
237 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
238 Name == "sin" || Name == "sinf" || Name == "sinl" ||
239 Name == "cos" || Name == "cosf" || Name == "cosl" ||
240 Name == "tan" || Name == "tanf" || Name == "tanl" ||
241 Name == "asin" || Name == "asinf" || Name == "asinl" ||
242 Name == "acos" || Name == "acosf" || Name == "acosl" ||
243 Name == "atan" || Name == "atanf" || Name == "atanl" ||
244 Name == "atan2" || Name == "atan2f" || Name == "atan2l"||
245 Name == "sinh" || Name == "sinhf" || Name == "sinhl" ||
246 Name == "cosh" || Name == "coshf" || Name == "coshl" ||
247 Name == "tanh" || Name == "tanhf" || Name == "tanhl" ||
248 Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ||
249 Name == "exp10" || Name == "exp10l" || Name == "exp10f")
250 return false;
251 // clang-format on
252 // These are all likely to be optimized into something smaller.
253 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
254 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
255 Name == "floorf" || Name == "ceil" || Name == "round" ||
256 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
257 Name == "llabs")
258 return false;
259
260 return true;
261 }
262
264 AssumptionCache &AC,
265 TargetLibraryInfo *LibInfo,
266 HardwareLoopInfo &HWLoopInfo) const {
267 return false;
268 }
269
270 virtual unsigned getEpilogueVectorizationMinVF() const { return 16; }
271
273 return false;
274 }
275
276 virtual TailFoldingStyle
277 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const {
279 }
280
281 virtual std::optional<Instruction *>
283 return std::nullopt;
284 }
285
286 virtual std::optional<Value *>
288 APInt DemandedMask, KnownBits &Known,
289 bool &KnownBitsComputed) const {
290 return std::nullopt;
291 }
292
293 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
294 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
295 APInt &UndefElts2, APInt &UndefElts3,
296 std::function<void(Instruction *, unsigned, APInt, APInt &)>
297 SimplifyAndSetOp) const {
298 return std::nullopt;
299 }
300
304
307
308 virtual bool isLegalAddImmediate(int64_t Imm) const { return false; }
309
310 virtual bool isLegalAddScalableImmediate(int64_t Imm) const { return false; }
311
312 virtual bool isLegalICmpImmediate(int64_t Imm) const { return false; }
313
314 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
315 int64_t BaseOffset, bool HasBaseReg,
316 int64_t Scale, unsigned AddrSpace,
317 Instruction *I = nullptr,
318 int64_t ScalableOffset = 0) const {
319 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
320 // taken from the implementation of LSR.
321 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
322 }
323
324 virtual bool isLSRCostLess(const TTI::LSRCost &C1,
325 const TTI::LSRCost &C2) const {
326 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
327 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
328 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
329 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
330 }
331
332 virtual bool isNumRegsMajorCostOfLSR() const { return true; }
333
334 virtual bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
335
337 return false;
338 }
339
340 virtual bool canMacroFuseCmp() const { return false; }
341
342 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
344 TargetLibraryInfo *LibInfo) const {
345 return false;
346 }
347
350 return TTI::AMK_None;
351 }
352
353 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment,
354 unsigned AddressSpace,
355 TTI::MaskKind MaskKind) const {
356 return false;
357 }
358
359 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment,
360 unsigned AddressSpace,
361 TTI::MaskKind MaskKind) const {
362 return false;
363 }
364
365 virtual bool isLegalNTStore(Type *DataType, Align Alignment) const {
366 // By default, assume nontemporal memory stores are available for stores
367 // that are aligned and have a size that is a power of 2.
368 unsigned DataSize = DL.getTypeStoreSize(DataType);
369 return Alignment >= DataSize && isPowerOf2_32(DataSize);
370 }
371
372 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const {
373 // By default, assume nontemporal memory loads are available for loads that
374 // are aligned and have a size that is a power of 2.
375 unsigned DataSize = DL.getTypeStoreSize(DataType);
376 return Alignment >= DataSize && isPowerOf2_32(DataSize);
377 }
378
379 virtual bool isLegalBroadcastLoad(Type *ElementTy,
380 ElementCount NumElements) const {
381 return false;
382 }
383
384 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const {
385 return false;
386 }
387
388 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
389 return false;
390 }
391
393 Align Alignment) const {
394 return false;
395 }
396
398 Align Alignment) const {
399 return false;
400 }
401
402 virtual bool isLegalMaskedCompressStore(Type *DataType,
403 Align Alignment) const {
404 return false;
405 }
406
407 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
408 unsigned Opcode1,
409 const SmallBitVector &OpcodeMask) const {
410 return false;
411 }
412
413 virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const {
414 return false;
415 }
416
417 virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const {
418 return false;
419 }
420
421 virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor,
422 Align Alignment,
423 unsigned AddrSpace) const {
424 return false;
425 }
426
427 virtual bool isLegalMaskedVectorHistogram(Type *AddrType,
428 Type *DataType) const {
429 return false;
430 }
431
432 virtual bool enableOrderedReductions() const { return false; }
433
434 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const {
435 return false;
436 }
437
438 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const {
439 return false;
440 }
441
442 virtual bool prefersVectorizedAddressing() const { return true; }
443
445 StackOffset BaseOffset,
446 bool HasBaseReg, int64_t Scale,
447 unsigned AddrSpace) const {
448 // Guess that all legal addressing mode are free.
449 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset.getFixed(), HasBaseReg,
450 Scale, AddrSpace, /*I=*/nullptr,
451 BaseOffset.getScalable()))
452 return 0;
454 }
455
456 virtual bool LSRWithInstrQueries() const { return false; }
457
458 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; }
459
460 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
461
462 virtual bool useAA() const { return false; }
463
464 virtual bool isTypeLegal(Type *Ty) const { return false; }
465
466 virtual unsigned getRegUsageForType(Type *Ty) const { return 1; }
467
468 virtual bool shouldBuildLookupTables() const { return true; }
469
471 return true;
472 }
473
474 virtual bool shouldBuildRelLookupTables() const { return false; }
475
476 virtual bool useColdCCForColdCall(Function &F) const { return false; }
477
478 virtual bool useFastCCForInternalCall(Function &F) const { return true; }
479
481 return false;
482 }
483
485 unsigned ScalarOpdIdx) const {
486 return false;
487 }
488
490 int OpdIdx) const {
491 return OpdIdx == -1;
492 }
493
494 virtual bool
496 int RetIdx) const {
497 return RetIdx == 0;
498 }
499
501 VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
502 TTI::TargetCostKind CostKind, bool ForPoisonSrc = true,
503 ArrayRef<Value *> VL = {},
505 // Default implementation returns 0.
506 // BasicTTIImpl provides the actual implementation.
507 return 0;
508 }
509
515
516 virtual bool supportsEfficientVectorElementLoadStore() const { return false; }
517
518 virtual bool supportsTailCalls() const { return true; }
519
520 virtual bool supportsTailCallFor(const CallBase *CB) const {
521 llvm_unreachable("Not implemented");
522 }
523
524 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const {
525 return false;
526 }
527
529 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
530 return {};
531 }
532
533 virtual bool enableSelectOptimize() const { return true; }
534
535 virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const {
536 // A select with two constant operands will usually be better left as a
537 // select.
538 using namespace llvm::PatternMatch;
540 return false;
541 // If the select is a logical-and/logical-or then it is better treated as a
542 // and/or by the backend.
543 return isa<SelectInst>(I) &&
546 }
547
548 virtual bool enableInterleavedAccessVectorization() const { return false; }
549
551 return false;
552 }
553
554 virtual bool isFPVectorizationPotentiallyUnsafe() const { return false; }
555
557 unsigned BitWidth,
558 unsigned AddressSpace,
559 Align Alignment,
560 unsigned *Fast) const {
561 return false;
562 }
563
565 getPopcntSupport(unsigned IntTyWidthInBit) const {
566 return TTI::PSK_Software;
567 }
568
569 virtual bool haveFastSqrt(Type *Ty) const { return false; }
570
572 return true;
573 }
574
575 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; }
576
577 virtual InstructionCost getFPOpCost(Type *Ty) const {
579 }
580
581 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
582 const APInt &Imm,
583 Type *Ty) const {
584 return 0;
585 }
586
587 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
589 return TTI::TCC_Basic;
590 }
591
592 virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx,
593 const APInt &Imm, Type *Ty,
595 Instruction *Inst = nullptr) const {
596 return TTI::TCC_Free;
597 }
598
599 virtual InstructionCost
600 getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
601 Type *Ty, TTI::TargetCostKind CostKind) const {
602 return TTI::TCC_Free;
603 }
604
606 const Function &Fn) const {
607 return false;
608 }
609
610 virtual unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
611 virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const {
612 return false;
613 }
614
615 virtual unsigned getRegisterClassForType(bool Vector,
616 Type *Ty = nullptr) const {
617 return Vector ? 1 : 0;
618 }
619
620 virtual const char *getRegisterClassName(unsigned ClassID) const {
621 switch (ClassID) {
622 default:
623 return "Generic::Unknown Register Class";
624 case 0:
625 return "Generic::ScalarRC";
626 case 1:
627 return "Generic::VectorRC";
628 }
629 }
630
631 virtual TypeSize
635
636 virtual unsigned getMinVectorRegisterBitWidth() const { return 128; }
637
638 virtual std::optional<unsigned> getMaxVScale() const { return std::nullopt; }
639 virtual std::optional<unsigned> getVScaleForTuning() const {
640 return std::nullopt;
641 }
642 virtual bool isVScaleKnownToBeAPowerOfTwo() const { return false; }
643
644 virtual bool
648
649 virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const {
650 return ElementCount::get(0, IsScalable);
651 }
652
653 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const {
654 return 0;
655 }
656 virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const {
657 return VF;
658 }
659
661 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
662 AllowPromotionWithoutCommonHeader = false;
663 return false;
664 }
665
666 virtual unsigned getCacheLineSize() const { return 0; }
667 virtual std::optional<unsigned>
669 switch (Level) {
671 [[fallthrough]];
673 return std::nullopt;
674 }
675 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
676 }
677
678 virtual std::optional<unsigned>
680 switch (Level) {
682 [[fallthrough]];
684 return std::nullopt;
685 }
686
687 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
688 }
689
690 virtual std::optional<unsigned> getMinPageSize() const { return {}; }
691
692 virtual unsigned getPrefetchDistance() const { return 0; }
693 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
694 unsigned NumStridedMemAccesses,
695 unsigned NumPrefetches,
696 bool HasCall) const {
697 return 1;
698 }
699 virtual unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; }
700 virtual bool enableWritePrefetching() const { return false; }
701 virtual bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; }
702
704 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
706 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
707 TTI::TargetCostKind CostKind, std::optional<FastMathFlags> FMF) const {
709 }
710
711 virtual unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; }
712
714 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
716 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) const {
717 // Widenable conditions will eventually lower into constants, so some
718 // operations with them will be trivially optimized away.
719 auto IsWidenableCondition = [](const Value *V) {
720 if (auto *II = dyn_cast<IntrinsicInst>(V))
721 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition)
722 return true;
723 return false;
724 };
725 // FIXME: A number of transformation tests seem to require these values
726 // which seems a little odd for how arbitary there are.
727 switch (Opcode) {
728 default:
729 break;
730 case Instruction::FDiv:
731 case Instruction::FRem:
732 case Instruction::SDiv:
733 case Instruction::SRem:
734 case Instruction::UDiv:
735 case Instruction::URem:
736 // FIXME: Unlikely to be true for CodeSize.
737 return TTI::TCC_Expensive;
738 case Instruction::And:
739 case Instruction::Or:
740 if (any_of(Args, IsWidenableCondition))
741 return TTI::TCC_Free;
742 break;
743 }
744
745 // Assume a 3cy latency for fp arithmetic ops.
747 if (Ty->getScalarType()->isFloatingPointTy())
748 return 3;
749
750 return 1;
751 }
752
753 virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0,
754 unsigned Opcode1,
755 const SmallBitVector &OpcodeMask,
758 }
759
760 virtual InstructionCost
763 VectorType *SubTp, ArrayRef<const Value *> Args = {},
764 const Instruction *CxtI = nullptr) const {
765 return 1;
766 }
767
768 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
769 Type *Src, TTI::CastContextHint CCH,
771 const Instruction *I) const {
772 switch (Opcode) {
773 default:
774 break;
775 case Instruction::IntToPtr: {
776 unsigned SrcSize = Src->getScalarSizeInBits();
777 if (DL.isLegalInteger(SrcSize) &&
778 SrcSize <= DL.getPointerTypeSizeInBits(Dst))
779 return 0;
780 break;
781 }
782 case Instruction::PtrToAddr: {
783 unsigned DstSize = Dst->getScalarSizeInBits();
784 assert(DstSize == DL.getAddressSizeInBits(Src));
785 if (DL.isLegalInteger(DstSize))
786 return 0;
787 break;
788 }
789 case Instruction::PtrToInt: {
790 unsigned DstSize = Dst->getScalarSizeInBits();
791 if (DL.isLegalInteger(DstSize) &&
792 DstSize >= DL.getPointerTypeSizeInBits(Src))
793 return 0;
794 break;
795 }
796 case Instruction::BitCast:
797 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy()))
798 // Identity and pointer-to-pointer casts are free.
799 return 0;
800 break;
801 case Instruction::Trunc: {
802 // trunc to a native type is free (assuming the target has compare and
803 // shift-right of the same width).
804 TypeSize DstSize = DL.getTypeSizeInBits(Dst);
805 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue()))
806 return 0;
807 break;
808 }
809 }
810 return 1;
811 }
812
813 virtual InstructionCost
814 getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
815 unsigned Index, TTI::TargetCostKind CostKind) const {
816 return 1;
817 }
818
819 virtual InstructionCost getCFInstrCost(unsigned Opcode,
821 const Instruction *I = nullptr) const {
822 // A phi would be free, unless we're costing the throughput because it
823 // will require a register.
824 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput)
825 return 0;
826 return 1;
827 }
828
830 unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
832 TTI::OperandValueInfo Op2Info, const Instruction *I) const {
833 return 1;
834 }
835
837 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
838 const Value *Op0, const Value *Op1,
840 return 1;
841 }
842
843 /// \param ScalarUserAndIdx encodes the information about extracts from a
844 /// vector with 'Scalar' being the value being extracted,'User' being the user
845 /// of the extract(nullptr if user is not known before vectorization) and
846 /// 'Idx' being the extract lane.
848 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
849 Value *Scalar,
850 ArrayRef<std::tuple<Value *, User *, int>> ScalarUserAndIdx,
852 return 1;
853 }
854
857 unsigned Index,
859 return 1;
860 }
861
862 virtual InstructionCost
865 unsigned Index) const {
866 return 1;
867 }
868
869 virtual InstructionCost
870 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
871 const APInt &DemandedDstElts,
873 return 1;
874 }
875
876 virtual InstructionCost
879 // Note: The `insertvalue` cost here is chosen to match the default case of
880 // getInstructionCost() -- as prior to adding this helper `insertvalue` was
881 // not handled.
882 if (Opcode == Instruction::InsertValue &&
884 return TTI::TCC_Basic;
885 return TTI::TCC_Free;
886 }
887
888 virtual InstructionCost
889 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
891 TTI::OperandValueInfo OpInfo, const Instruction *I) const {
892 return 1;
893 }
894
896 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
897 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
898 bool UseMaskForCond, bool UseMaskForGaps) const {
899 return 1;
900 }
901
902 virtual InstructionCost
905 switch (ICA.getID()) {
906 default:
907 break;
908 case Intrinsic::allow_runtime_check:
909 case Intrinsic::allow_ubsan_check:
910 case Intrinsic::annotation:
911 case Intrinsic::assume:
912 case Intrinsic::sideeffect:
913 case Intrinsic::pseudoprobe:
914 case Intrinsic::arithmetic_fence:
915 case Intrinsic::dbg_assign:
916 case Intrinsic::dbg_declare:
917 case Intrinsic::dbg_value:
918 case Intrinsic::dbg_label:
919 case Intrinsic::invariant_start:
920 case Intrinsic::invariant_end:
921 case Intrinsic::launder_invariant_group:
922 case Intrinsic::strip_invariant_group:
923 case Intrinsic::is_constant:
924 case Intrinsic::lifetime_start:
925 case Intrinsic::lifetime_end:
926 case Intrinsic::experimental_noalias_scope_decl:
927 case Intrinsic::objectsize:
928 case Intrinsic::ptr_annotation:
929 case Intrinsic::var_annotation:
930 case Intrinsic::experimental_gc_result:
931 case Intrinsic::experimental_gc_relocate:
932 case Intrinsic::coro_alloc:
933 case Intrinsic::coro_begin:
934 case Intrinsic::coro_begin_custom_abi:
935 case Intrinsic::coro_free:
936 case Intrinsic::coro_end:
937 case Intrinsic::coro_frame:
938 case Intrinsic::coro_size:
939 case Intrinsic::coro_align:
940 case Intrinsic::coro_suspend:
941 case Intrinsic::coro_subfn_addr:
942 case Intrinsic::threadlocal_address:
943 case Intrinsic::experimental_widenable_condition:
944 case Intrinsic::ssa_copy:
945 // These intrinsics don't actually represent code after lowering.
946 return 0;
947 }
948 return 1;
949 }
950
951 virtual InstructionCost
954 switch (MICA.getID()) {
955 case Intrinsic::masked_scatter:
956 case Intrinsic::masked_gather:
957 case Intrinsic::masked_load:
958 case Intrinsic::masked_store:
959 case Intrinsic::vp_scatter:
960 case Intrinsic::vp_gather:
961 case Intrinsic::masked_compressstore:
962 case Intrinsic::masked_expandload:
963 return 1;
964 }
966 }
967
971 return 1;
972 }
973
974 // Assume that we have a register of the right size for the type.
975 virtual unsigned getNumberOfParts(Type *Tp) const { return 1; }
976
979 const SCEV *,
980 TTI::TargetCostKind) const {
981 return 0;
982 }
983
984 virtual InstructionCost
986 std::optional<FastMathFlags> FMF,
987 TTI::TargetCostKind) const {
988 return 1;
989 }
990
996
997 virtual InstructionCost
998 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
999 VectorType *Ty, std::optional<FastMathFlags> FMF,
1001 return 1;
1002 }
1003
1004 virtual InstructionCost
1005 getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy,
1007 return 1;
1008 }
1009
1010 virtual InstructionCost
1012 return 0;
1013 }
1014
1016 MemIntrinsicInfo &Info) const {
1017 return false;
1018 }
1019
1020 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const {
1021 // Note for overrides: You must ensure for all element unordered-atomic
1022 // memory intrinsics that all power-of-2 element sizes up to, and
1023 // including, the return value of this method have a corresponding
1024 // runtime lib call. These runtime lib call definitions can be found
1025 // in RuntimeLibcalls.h
1026 return 0;
1027 }
1028
1029 virtual Value *
1031 bool CanCreate = true) const {
1032 return nullptr;
1033 }
1034
1035 virtual Type *
1037 unsigned SrcAddrSpace, unsigned DestAddrSpace,
1038 Align SrcAlign, Align DestAlign,
1039 std::optional<uint32_t> AtomicElementSize) const {
1040 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8)
1041 : Type::getInt8Ty(Context);
1042 }
1043
1045 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1046 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1047 Align SrcAlign, Align DestAlign,
1048 std::optional<uint32_t> AtomicCpySize) const {
1049 unsigned OpSizeInBytes = AtomicCpySize.value_or(1);
1050 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8);
1051 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes)
1052 OpsOut.push_back(OpType);
1053 }
1054
1055 virtual bool areInlineCompatible(const Function *Caller,
1056 const Function *Callee) const {
1057 return (Caller->getFnAttribute("target-cpu") ==
1058 Callee->getFnAttribute("target-cpu")) &&
1059 (Caller->getFnAttribute("target-features") ==
1060 Callee->getFnAttribute("target-features"));
1061 }
1062
1063 virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call,
1064 unsigned DefaultCallPenalty) const {
1065 return DefaultCallPenalty;
1066 }
1067
1068 virtual bool areTypesABICompatible(const Function *Caller,
1069 const Function *Callee,
1070 ArrayRef<Type *> Types) const {
1071 return (Caller->getFnAttribute("target-cpu") ==
1072 Callee->getFnAttribute("target-cpu")) &&
1073 (Caller->getFnAttribute("target-features") ==
1074 Callee->getFnAttribute("target-features"));
1075 }
1076
1078 return false;
1079 }
1080
1082 return false;
1083 }
1084
1085 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
1086 return 128;
1087 }
1088
1089 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
1090
1091 virtual bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
1092
1093 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1094 Align Alignment,
1095 unsigned AddrSpace) const {
1096 return true;
1097 }
1098
1099 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1100 Align Alignment,
1101 unsigned AddrSpace) const {
1102 return true;
1103 }
1104
1106 ElementCount VF) const {
1107 return true;
1108 }
1109
1111 return true;
1112 }
1113
1114 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1115 unsigned ChainSizeInBytes,
1116 VectorType *VecTy) const {
1117 return VF;
1118 }
1119
1120 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1121 unsigned ChainSizeInBytes,
1122 VectorType *VecTy) const {
1123 return VF;
1124 }
1125
1126 virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const {
1127 return false;
1128 }
1129
1130 virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const {
1131 return false;
1132 }
1133 virtual bool preferAlternateOpcodeVectorization() const { return true; }
1134
1135 virtual bool preferPredicatedReductionSelect() const { return false; }
1136
1137 virtual bool preferEpilogueVectorization() const { return true; }
1138
1139 virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
1140
1141 virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
1142 return true;
1143 }
1144
1145 virtual TTI::ReductionShuffle
1149
1150 virtual unsigned getGISelRematGlobalCost() const { return 1; }
1151
1152 virtual unsigned getMinTripCountTailFoldingThreshold() const { return 0; }
1153
1154 virtual bool supportsScalableVectors() const { return false; }
1155
1156 virtual bool enableScalableVectorization() const { return false; }
1157
1158 virtual bool hasActiveVectorLength() const { return false; }
1159
1161 SmallVectorImpl<Use *> &Ops) const {
1162 return false;
1163 }
1164
1165 virtual bool isVectorShiftByScalarCheap(Type *Ty) const { return false; }
1166
1173
1174 virtual bool hasArmWideBranch(bool) const { return false; }
1175
1176 virtual APInt getFeatureMask(const Function &F) const {
1177 return APInt::getZero(32);
1178 }
1179
1180 virtual APInt getPriorityMask(const Function &F) const {
1181 return APInt::getZero(32);
1182 }
1183
1184 virtual bool isMultiversionedFunction(const Function &F) const {
1185 return false;
1186 }
1187
1188 virtual unsigned getMaxNumArgs() const { return UINT_MAX; }
1189
1190 virtual unsigned getNumBytesToPadGlobalArray(unsigned Size,
1191 Type *ArrayType) const {
1192 return 0;
1193 }
1194
1196 const Function &F,
1197 SmallVectorImpl<std::pair<StringRef, int64_t>> &LB) const {}
1198
1199 virtual bool allowVectorElementIndexingUsingGEP() const { return true; }
1200
1201protected:
1202 // Obtain the minimum required size to hold the value (without the sign)
1203 // In case of a vector it returns the min required size for one element.
1204 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const {
1206 const auto *VectorValue = cast<Constant>(Val);
1207
1208 // In case of a vector need to pick the max between the min
1209 // required size for each element
1210 auto *VT = cast<FixedVectorType>(Val->getType());
1211
1212 // Assume unsigned elements
1213 isSigned = false;
1214
1215 // The max required size is the size of the vector element type
1216 unsigned MaxRequiredSize =
1217 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue();
1218
1219 unsigned MinRequiredSize = 0;
1220 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
1221 if (auto *IntElement =
1222 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
1223 bool signedElement = IntElement->getValue().isNegative();
1224 // Get the element min required size.
1225 unsigned ElementMinRequiredSize =
1226 IntElement->getValue().getSignificantBits() - 1;
1227 // In case one element is signed then all the vector is signed.
1228 isSigned |= signedElement;
1229 // Save the max required bit size between all the elements.
1230 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
1231 } else {
1232 // not an int constant element
1233 return MaxRequiredSize;
1234 }
1235 }
1236 return MinRequiredSize;
1237 }
1238
1239 if (const auto *CI = dyn_cast<ConstantInt>(Val)) {
1240 isSigned = CI->getValue().isNegative();
1241 return CI->getValue().getSignificantBits() - 1;
1242 }
1243
1244 if (const auto *Cast = dyn_cast<SExtInst>(Val)) {
1245 isSigned = true;
1246 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
1247 }
1248
1249 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) {
1250 isSigned = false;
1251 return Cast->getSrcTy()->getScalarSizeInBits();
1252 }
1253
1254 isSigned = false;
1255 return Val->getType()->getScalarSizeInBits();
1256 }
1257
1258 bool isStridedAccess(const SCEV *Ptr) const {
1259 return Ptr && isa<SCEVAddRecExpr>(Ptr);
1260 }
1261
1263 const SCEV *Ptr) const {
1264 if (!isStridedAccess(Ptr))
1265 return nullptr;
1266 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
1267 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
1268 }
1269
1271 int64_t MergeDistance) const {
1272 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
1273 if (!Step)
1274 return false;
1275 APInt StrideVal = Step->getAPInt();
1276 if (StrideVal.getBitWidth() > 64)
1277 return false;
1278 // FIXME: Need to take absolute value for negative stride case.
1279 return StrideVal.getSExtValue() < MergeDistance;
1280 }
1281};
1282
1283/// CRTP base class for use as a mix-in that aids implementing
1284/// a TargetTransformInfo-compatible class.
1285template <typename T>
1287private:
1288 typedef TargetTransformInfoImplBase BaseT;
1289
1290protected:
1292
1293public:
1294 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1295 ArrayRef<const Value *> Operands, Type *AccessType,
1296 TTI::TargetCostKind CostKind) const override {
1297 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
1298 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
1299 bool HasBaseReg = (BaseGV == nullptr);
1300
1301 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
1302 APInt BaseOffset(PtrSizeBits, 0);
1303 int64_t Scale = 0;
1304
1305 auto GTI = gep_type_begin(PointeeType, Operands);
1306 Type *TargetType = nullptr;
1307
1308 // Handle the case where the GEP instruction has a single operand,
1309 // the basis, therefore TargetType is a nullptr.
1310 if (Operands.empty())
1311 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
1312
1313 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
1314 TargetType = GTI.getIndexedType();
1315 // We assume that the cost of Scalar GEP with constant index and the
1316 // cost of Vector GEP with splat constant index are the same.
1317 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
1318 if (!ConstIdx)
1319 if (auto Splat = getSplatValue(*I))
1320 ConstIdx = dyn_cast<ConstantInt>(Splat);
1321 if (StructType *STy = GTI.getStructTypeOrNull()) {
1322 // For structures the index is always splat or scalar constant
1323 assert(ConstIdx && "Unexpected GEP index");
1324 uint64_t Field = ConstIdx->getZExtValue();
1325 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
1326 } else {
1327 // If this operand is a scalable type, bail out early.
1328 // TODO: Make isLegalAddressingMode TypeSize aware.
1329 if (TargetType->isScalableTy())
1330 return TTI::TCC_Basic;
1331 int64_t ElementSize =
1332 GTI.getSequentialElementStride(DL).getFixedValue();
1333 if (ConstIdx) {
1334 BaseOffset +=
1335 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
1336 } else {
1337 // Needs scale register.
1338 if (Scale != 0)
1339 // No addressing mode takes two scale registers.
1340 return TTI::TCC_Basic;
1341 Scale = ElementSize;
1342 }
1343 }
1344 }
1345
1346 // If we haven't been provided a hint, use the target type for now.
1347 //
1348 // TODO: Take a look at potentially removing this: This is *slightly* wrong
1349 // as it's possible to have a GEP with a foldable target type but a memory
1350 // access that isn't foldable. For example, this load isn't foldable on
1351 // RISC-V:
1352 //
1353 // %p = getelementptr i32, ptr %base, i32 42
1354 // %x = load <2 x i32>, ptr %p
1355 if (!AccessType)
1356 AccessType = TargetType;
1357
1358 // If the final address of the GEP is a legal addressing mode for the given
1359 // access type, then we can fold it into its users.
1360 if (static_cast<const T *>(this)->isLegalAddressingMode(
1361 AccessType, const_cast<GlobalValue *>(BaseGV),
1362 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
1364 return TTI::TCC_Free;
1365
1366 // TODO: Instead of returning TCC_Basic here, we should use
1367 // getArithmeticInstrCost. Or better yet, provide a hook to let the target
1368 // model it.
1369 return TTI::TCC_Basic;
1370 }
1371
1374 const TTI::PointersChainInfo &Info, Type *AccessTy,
1375 TTI::TargetCostKind CostKind) const override {
1377 // In the basic model we take into account GEP instructions only
1378 // (although here can come alloca instruction, a value, constants and/or
1379 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a
1380 // pointer). Typically, if Base is a not a GEP-instruction and all the
1381 // pointers are relative to the same base address, all the rest are
1382 // either GEP instructions, PHIs, bitcasts or constants. When we have same
1383 // base, we just calculate cost of each non-Base GEP as an ADD operation if
1384 // any their index is a non-const.
1385 // If no known dependecies between the pointers cost is calculated as a sum
1386 // of costs of GEP instructions.
1387 for (const Value *V : Ptrs) {
1388 const auto *GEP = dyn_cast<GetElementPtrInst>(V);
1389 if (!GEP)
1390 continue;
1391 if (Info.isSameBase() && V != Base) {
1392 if (GEP->hasAllConstantIndices())
1393 continue;
1394 Cost += static_cast<const T *>(this)->getArithmeticInstrCost(
1395 Instruction::Add, GEP->getType(), CostKind,
1396 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None},
1397 {});
1398 } else {
1399 SmallVector<const Value *> Indices(GEP->indices());
1400 Cost += static_cast<const T *>(this)->getGEPCost(
1401 GEP->getSourceElementType(), GEP->getPointerOperand(), Indices,
1402 AccessTy, CostKind);
1403 }
1404 }
1405 return Cost;
1406 }
1407
1410 TTI::TargetCostKind CostKind) const override {
1411 using namespace llvm::PatternMatch;
1412
1413 auto *TargetTTI = static_cast<const T *>(this);
1414 // Handle non-intrinsic calls, invokes, and callbr.
1415 // FIXME: Unlikely to be true for anything but CodeSize.
1416 auto *CB = dyn_cast<CallBase>(U);
1417 if (CB && !isa<IntrinsicInst>(U)) {
1418 if (const Function *F = CB->getCalledFunction()) {
1419 if (!TargetTTI->isLoweredToCall(F))
1420 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
1421
1422 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1);
1423 }
1424 // For indirect or other calls, scale cost by number of arguments.
1425 return TTI::TCC_Basic * (CB->arg_size() + 1);
1426 }
1427
1428 Type *Ty = U->getType();
1429 unsigned Opcode = Operator::getOpcode(U);
1430 auto *I = dyn_cast<Instruction>(U);
1431 switch (Opcode) {
1432 default:
1433 break;
1434 case Instruction::Call: {
1435 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call");
1436 auto *Intrinsic = cast<IntrinsicInst>(U);
1437 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB);
1438 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind);
1439 }
1440 case Instruction::Br:
1441 case Instruction::Ret:
1442 case Instruction::PHI:
1443 case Instruction::Switch:
1444 return TargetTTI->getCFInstrCost(Opcode, CostKind, I);
1445 case Instruction::Freeze:
1446 return TTI::TCC_Free;
1447 case Instruction::ExtractValue:
1448 case Instruction::InsertValue:
1449 return TargetTTI->getInsertExtractValueCost(Opcode, CostKind);
1450 case Instruction::Alloca:
1451 if (cast<AllocaInst>(U)->isStaticAlloca())
1452 return TTI::TCC_Free;
1453 break;
1454 case Instruction::GetElementPtr: {
1455 const auto *GEP = cast<GEPOperator>(U);
1456 Type *AccessType = nullptr;
1457 // For now, only provide the AccessType in the simple case where the GEP
1458 // only has one user.
1459 if (GEP->hasOneUser() && I)
1460 AccessType = I->user_back()->getAccessType();
1461
1462 return TargetTTI->getGEPCost(GEP->getSourceElementType(),
1463 Operands.front(), Operands.drop_front(),
1464 AccessType, CostKind);
1465 }
1466 case Instruction::Add:
1467 case Instruction::FAdd:
1468 case Instruction::Sub:
1469 case Instruction::FSub:
1470 case Instruction::Mul:
1471 case Instruction::FMul:
1472 case Instruction::UDiv:
1473 case Instruction::SDiv:
1474 case Instruction::FDiv:
1475 case Instruction::URem:
1476 case Instruction::SRem:
1477 case Instruction::FRem:
1478 case Instruction::Shl:
1479 case Instruction::LShr:
1480 case Instruction::AShr:
1481 case Instruction::And:
1482 case Instruction::Or:
1483 case Instruction::Xor:
1484 case Instruction::FNeg: {
1485 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]);
1486 TTI::OperandValueInfo Op2Info;
1487 if (Opcode != Instruction::FNeg)
1488 Op2Info = TTI::getOperandInfo(Operands[1]);
1489 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
1490 Op2Info, Operands, I);
1491 }
1492 case Instruction::IntToPtr:
1493 case Instruction::PtrToAddr:
1494 case Instruction::PtrToInt:
1495 case Instruction::SIToFP:
1496 case Instruction::UIToFP:
1497 case Instruction::FPToUI:
1498 case Instruction::FPToSI:
1499 case Instruction::Trunc:
1500 case Instruction::FPTrunc:
1501 case Instruction::BitCast:
1502 case Instruction::FPExt:
1503 case Instruction::SExt:
1504 case Instruction::ZExt:
1505 case Instruction::AddrSpaceCast: {
1506 Type *OpTy = Operands[0]->getType();
1507 return TargetTTI->getCastInstrCost(
1508 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I);
1509 }
1510 case Instruction::Store: {
1511 auto *SI = cast<StoreInst>(U);
1512 Type *ValTy = Operands[0]->getType();
1513 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]);
1514 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(),
1515 SI->getPointerAddressSpace(), CostKind,
1516 OpInfo, I);
1517 }
1518 case Instruction::Load: {
1519 // FIXME: Arbitary cost which could come from the backend.
1521 return 4;
1522 auto *LI = cast<LoadInst>(U);
1523 Type *LoadType = U->getType();
1524 // If there is a non-register sized type, the cost estimation may expand
1525 // it to be several instructions to load into multiple registers on the
1526 // target. But, if the only use of the load is a trunc instruction to a
1527 // register sized type, the instruction selector can combine these
1528 // instructions to be a single load. So, in this case, we use the
1529 // destination type of the trunc instruction rather than the load to
1530 // accurately estimate the cost of this load instruction.
1531 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() &&
1532 !LoadType->isVectorTy()) {
1533 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin()))
1534 LoadType = TI->getDestTy();
1535 }
1536 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(),
1538 {TTI::OK_AnyValue, TTI::OP_None}, I);
1539 }
1540 case Instruction::Select: {
1541 const Value *Op0, *Op1;
1542 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) ||
1543 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) {
1544 // select x, y, false --> x & y
1545 // select x, true, y --> x | y
1546 const auto Op1Info = TTI::getOperandInfo(Op0);
1547 const auto Op2Info = TTI::getOperandInfo(Op1);
1548 assert(Op0->getType()->getScalarSizeInBits() == 1 &&
1549 Op1->getType()->getScalarSizeInBits() == 1);
1550
1551 SmallVector<const Value *, 2> Operands{Op0, Op1};
1552 return TargetTTI->getArithmeticInstrCost(
1553 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty,
1554 CostKind, Op1Info, Op2Info, Operands, I);
1555 }
1556 const auto Op1Info = TTI::getOperandInfo(Operands[1]);
1557 const auto Op2Info = TTI::getOperandInfo(Operands[2]);
1558 Type *CondTy = Operands[0]->getType();
1559 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy,
1561 CostKind, Op1Info, Op2Info, I);
1562 }
1563 case Instruction::ICmp:
1564 case Instruction::FCmp: {
1565 const auto Op1Info = TTI::getOperandInfo(Operands[0]);
1566 const auto Op2Info = TTI::getOperandInfo(Operands[1]);
1567 Type *ValTy = Operands[0]->getType();
1568 // TODO: Also handle ICmp/FCmp constant expressions.
1569 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(),
1570 I ? cast<CmpInst>(I)->getPredicate()
1572 CostKind, Op1Info, Op2Info, I);
1573 }
1574 case Instruction::InsertElement: {
1575 auto *IE = dyn_cast<InsertElementInst>(U);
1576 if (!IE)
1577 return TTI::TCC_Basic; // FIXME
1578 unsigned Idx = -1;
1579 if (auto *CI = dyn_cast<ConstantInt>(Operands[2]))
1580 if (CI->getValue().getActiveBits() <= 32)
1581 Idx = CI->getZExtValue();
1582 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx,
1584 }
1585 case Instruction::ShuffleVector: {
1586 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U);
1587 if (!Shuffle)
1588 return TTI::TCC_Basic; // FIXME
1589
1590 auto *VecTy = cast<VectorType>(U->getType());
1591 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType());
1592 ArrayRef<int> Mask = Shuffle->getShuffleMask();
1593 int NumSubElts, SubIndex;
1594
1595 // Treat undef/poison mask as free (no matter the length).
1596 if (all_of(Mask, [](int M) { return M < 0; }))
1597 return TTI::TCC_Free;
1598
1599 // TODO: move more of this inside improveShuffleKindFromMask.
1600 if (Shuffle->changesLength()) {
1601 // Treat a 'subvector widening' as a free shuffle.
1602 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding())
1603 return TTI::TCC_Free;
1604
1605 if (Shuffle->isExtractSubvectorMask(SubIndex))
1606 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
1607 VecSrcTy, Mask, CostKind, SubIndex,
1608 VecTy, Operands, Shuffle);
1609
1610 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1611 return TargetTTI->getShuffleCost(
1612 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind,
1613 SubIndex,
1614 FixedVectorType::get(VecTy->getScalarType(), NumSubElts),
1615 Operands, Shuffle);
1616
1617 int ReplicationFactor, VF;
1618 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) {
1619 APInt DemandedDstElts = APInt::getZero(Mask.size());
1620 for (auto I : enumerate(Mask)) {
1621 if (I.value() != PoisonMaskElem)
1622 DemandedDstElts.setBit(I.index());
1623 }
1624 return TargetTTI->getReplicationShuffleCost(
1625 VecSrcTy->getElementType(), ReplicationFactor, VF,
1626 DemandedDstElts, CostKind);
1627 }
1628
1629 bool IsUnary = isa<UndefValue>(Operands[1]);
1630 NumSubElts = VecSrcTy->getElementCount().getKnownMinValue();
1631 SmallVector<int, 16> AdjustMask(Mask);
1632
1633 // Widening shuffle - widening the source(s) to the new length
1634 // (treated as free - see above), and then perform the adjusted
1635 // shuffle at that width.
1636 if (Shuffle->increasesLength()) {
1637 for (int &M : AdjustMask)
1638 M = M >= NumSubElts ? (M + (Mask.size() - NumSubElts)) : M;
1639
1640 return TargetTTI->getShuffleCost(
1642 VecTy, AdjustMask, CostKind, 0, nullptr, Operands, Shuffle);
1643 }
1644
1645 // Narrowing shuffle - perform shuffle at original wider width and
1646 // then extract the lower elements.
1647 // FIXME: This can assume widening, which is not true of all vector
1648 // architectures (and is not even the default).
1649 AdjustMask.append(NumSubElts - Mask.size(), PoisonMaskElem);
1650
1651 InstructionCost ShuffleCost = TargetTTI->getShuffleCost(
1653 VecSrcTy, VecSrcTy, AdjustMask, CostKind, 0, nullptr, Operands,
1654 Shuffle);
1655
1656 SmallVector<int, 16> ExtractMask(Mask.size());
1657 std::iota(ExtractMask.begin(), ExtractMask.end(), 0);
1658 return ShuffleCost + TargetTTI->getShuffleCost(
1659 TTI::SK_ExtractSubvector, VecTy, VecSrcTy,
1660 ExtractMask, CostKind, 0, VecTy, {}, Shuffle);
1661 }
1662
1663 if (Shuffle->isIdentity())
1664 return TTI::TCC_Free;
1665
1666 if (Shuffle->isReverse())
1667 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, VecSrcTy, Mask,
1668 CostKind, 0, nullptr, Operands,
1669 Shuffle);
1670
1671 if (Shuffle->isTranspose())
1672 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, VecSrcTy,
1673 Mask, CostKind, 0, nullptr, Operands,
1674 Shuffle);
1675
1676 if (Shuffle->isZeroEltSplat())
1677 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, VecSrcTy,
1678 Mask, CostKind, 0, nullptr, Operands,
1679 Shuffle);
1680
1681 if (Shuffle->isSingleSource())
1682 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy,
1683 VecSrcTy, Mask, CostKind, 0, nullptr,
1684 Operands, Shuffle);
1685
1686 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex))
1687 return TargetTTI->getShuffleCost(
1688 TTI::SK_InsertSubvector, VecTy, VecSrcTy, Mask, CostKind, SubIndex,
1689 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), Operands,
1690 Shuffle);
1691
1692 if (Shuffle->isSelect())
1693 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, VecSrcTy, Mask,
1694 CostKind, 0, nullptr, Operands,
1695 Shuffle);
1696
1697 if (Shuffle->isSplice(SubIndex))
1698 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, VecSrcTy, Mask,
1699 CostKind, SubIndex, nullptr, Operands,
1700 Shuffle);
1701
1702 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, VecSrcTy,
1703 Mask, CostKind, 0, nullptr, Operands,
1704 Shuffle);
1705 }
1706 case Instruction::ExtractElement: {
1707 auto *EEI = dyn_cast<ExtractElementInst>(U);
1708 if (!EEI)
1709 return TTI::TCC_Basic; // FIXME
1710 unsigned Idx = -1;
1711 if (auto *CI = dyn_cast<ConstantInt>(Operands[1]))
1712 if (CI->getValue().getActiveBits() <= 32)
1713 Idx = CI->getZExtValue();
1714 Type *DstTy = Operands[0]->getType();
1715 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx);
1716 }
1717 }
1718
1719 // By default, just classify everything remaining as 'basic'.
1720 return TTI::TCC_Basic;
1721 }
1722
1724 auto *TargetTTI = static_cast<const T *>(this);
1725 SmallVector<const Value *, 4> Ops(I->operand_values());
1726 InstructionCost Cost = TargetTTI->getInstructionCost(
1729 }
1730
1731 bool supportsTailCallFor(const CallBase *CB) const override {
1732 return static_cast<const T *>(this)->supportsTailCalls();
1733 }
1734};
1735} // namespace llvm
1736
1737#endif
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Hexagon Common GEP
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
Class for arbitrary precision integers.
Definition APInt.h:78
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
LLVM_ABI APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition APInt.cpp:1052
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
iterator end() const
Definition ArrayRef.h:131
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Class to represent array types.
A cache of @llvm.assume calls within a function.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
The core instruction combiner logic.
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
Information for memory intrinsic cost model.
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
The optimization diagnostic interface.
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
This class represents a constant integer value.
const APInt & getAPInt() const
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Class to represent struct types.
Multiway switch.
Provides information about what library functions are available for the current target.
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const
virtual bool preferAlternateOpcodeVectorization() const
virtual bool isProfitableLSRChainElement(Instruction *I) const
virtual unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, unsigned RedOpcode, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind) const
virtual const DataLayout & getDataLayout() const
virtual bool preferFixedOverScalableIfEqualCost(bool IsEpilogue) const
virtual std::optional< unsigned > getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
virtual bool enableInterleavedAccessVectorization() const
virtual InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind, std::optional< FastMathFlags > FMF) const
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
virtual InstructionCost getFPOpCost(Type *Ty) const
virtual unsigned getMaxInterleaveFactor(ElementCount VF) const
virtual bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
virtual TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isStridedAccess(const SCEV *Ptr) const
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
virtual TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) const
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const
virtual InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *, const SCEV *, TTI::TargetCostKind) const
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
virtual bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual unsigned adjustInliningThreshold(const CallBase *CB) const
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual bool shouldDropLSRSolutionIfLessProfitable() const
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment, unsigned AddressSpace, TTI::MaskKind MaskKind) const
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) const
virtual bool isLegalStridedLoadStore(Type *DataType, Align Alignment) const
virtual unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const
virtual bool isLegalICmpImmediate(int64_t Imm) const
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo, const Instruction *I) const
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
virtual bool haveFastSqrt(Type *Ty) const
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
virtual bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
virtual std::optional< unsigned > getVScaleForTuning() const
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, StackOffset BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) const
virtual unsigned getNumberOfParts(Type *Tp) const
virtual bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
virtual void getPeelingPreferences(Loop *, ScalarEvolution &, TTI::PeelingPreferences &) const
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
virtual bool useColdCCForColdCall(Function &F) const
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
virtual bool isLegalAddScalableImmediate(int64_t Imm) const
virtual bool isLegalInterleavedAccessType(VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace) const
TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg)
virtual bool shouldPrefetchAddressSpace(unsigned AS) const
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment) const
virtual uint64_t getMaxMemIntrinsicInlineSizeThreshold() const
virtual KnownBits computeKnownBitsAddrSpaceCast(unsigned FromAS, unsigned ToAS, const KnownBits &FromPtrBits) const
virtual unsigned getMinVectorRegisterBitWidth() const
unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const
virtual bool shouldBuildLookupTablesForConstant(Constant *C) const
virtual bool isFPVectorizationPotentiallyUnsafe() const
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
virtual InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind, Instruction *Inst=nullptr) const
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
virtual InstructionCost getIndexedVectorInstrCostFromEnd(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index) const
virtual std::optional< unsigned > getCacheSize(TargetTransformInfo::CacheLevel Level) const
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index, TTI::TargetCostKind CostKind) const
virtual bool shouldTreatInstructionLikeSelect(const Instruction *I) const
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
virtual unsigned getEpilogueVectorizationMinVF() const
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicCpySize) const
virtual TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
virtual TTI::AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const
virtual unsigned getMaxPrefetchIterationsAhead() const
virtual bool allowVectorElementIndexingUsingGEP() const
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const
virtual TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const
const SCEVConstant * getConstantStrideStep(ScalarEvolution *SE, const SCEV *Ptr) const
virtual bool hasBranchDivergence(const Function *F=nullptr) const
virtual InstructionCost getArithmeticReductionCost(unsigned, VectorType *, std::optional< FastMathFlags > FMF, TTI::TargetCostKind) const
virtual bool isProfitableToHoist(Instruction *I) const
virtual const char * getRegisterClassName(unsigned ClassID) const
virtual InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, FastMathFlags, TTI::TargetCostKind) const
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const
virtual bool isTargetIntrinsicTriviallyScalarizable(Intrinsic::ID ID) const
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
virtual InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask, TTI::TargetCostKind CostKind) const
virtual unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, unsigned DefaultCallPenalty) const
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
virtual bool isLegalNTStore(Type *DataType, Align Alignment) const
virtual APInt getFeatureMask(const Function &F) const
virtual InstructionCost getMemIntrinsicInstrCost(const MemIntrinsicCostAttributes &MICA, TTI::TargetCostKind CostKind) const
virtual std::optional< unsigned > getMinPageSize() const
virtual unsigned getRegUsageForType(Type *Ty) const
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr, int64_t ScalableOffset=0) const
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind, bool ForPoisonSrc=true, ArrayRef< Value * > VL={}, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const
virtual bool isLoweredToCall(const Function *F) const
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Scalar, ArrayRef< std::tuple< Value *, User *, int > > ScalarUserAndIdx, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr) const
virtual bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty) const
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const
virtual BranchProbability getPredictableBranchThreshold() const
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind) const
virtual bool isLegalToVectorizeStore(StoreInst *SI) const
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const
virtual bool isTargetIntrinsicWithStructReturnOverloadAtField(Intrinsic::ID ID, int RetIdx) const
virtual bool hasConditionalLoadStoreForType(Type *Ty, bool IsStore) const
virtual bool preferInLoopReduction(RecurKind Kind, Type *Ty) const
virtual bool isMultiversionedFunction(const Function &F) const
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const
virtual InstructionUniformity getInstructionUniformity(const Value *V) const
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
virtual bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const
virtual bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) const
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
virtual bool isLegalAddImmediate(int64_t Imm) const
virtual InstructionCost getInsertExtractValueCost(unsigned Opcode, TTI::TargetCostKind CostKind) const
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I) const
virtual bool isLegalNTLoad(Type *DataType, Align Alignment) const
virtual InstructionCost getBranchMispredictPenalty() const
virtual bool isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind) const
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty) const
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance) const
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType, bool CanCreate=true) const
virtual bool enableMaskedInterleavedAccessVectorization() const
virtual std::pair< KnownBits, KnownBits > computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, Align SrcAlign, Align DestAlign, std::optional< uint32_t > AtomicElementSize) const
virtual unsigned getInliningThresholdMultiplier() const
TargetTransformInfoImplBase(const DataLayout &DL)
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info, TTI::OperandValueInfo Op2Info, const Instruction *I) const
virtual bool shouldExpandReduction(const IntrinsicInst *II) const
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
virtual unsigned getGISelRematGlobalCost() const
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const
virtual bool isTypeLegal(Type *Ty) const
virtual unsigned getAssumedAddrSpace(const Value *V) const
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast) const
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind) const
virtual unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, ArrayRef< Type * > Types) const
virtual unsigned getNumBytesToPadGlobalArray(unsigned Size, Type *ArrayType) const
virtual bool preferToKeepConstantsAttached(const Instruction &Inst, const Function &Fn) const
virtual InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *DstTy, VectorType *SrcTy, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
virtual bool supportsTailCallFor(const CallBase *CB) const
virtual std::optional< unsigned > getMaxVScale() const
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
virtual bool isTargetIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx) const
virtual bool shouldConsiderVectorizationRegPressure() const
virtual InstructionCost getMemcpyCost(const Instruction *I) const
virtual unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const
virtual bool useFastCCForInternalCall(Function &F) const
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
virtual void getUnrollingPreferences(Loop *, ScalarEvolution &, TTI::UnrollingPreferences &, OptimizationRemarkEmitter *) const
TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)=default
virtual bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
virtual bool supportsEfficientVectorElementLoadStore() const
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
virtual APInt getPriorityMask(const Function &F) const
virtual unsigned getMinTripCountTailFoldingThreshold() const
virtual TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const
virtual void collectKernelLaunchBounds(const Function &F, SmallVectorImpl< std::pair< StringRef, int64_t > > &LB) const
bool supportsTailCallFor(const CallBase *CB) const override
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const override
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind) const override
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind) const override
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, Type *AccessType, TTI::TargetCostKind CostKind) const override
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
static LLVM_ABI CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
VectorInstrContext
Represents a hint about the context in which an insert/extract is used.
@ None
The insert/extract is not used with a load/store.
MaskKind
Some targets only support masked load/store with a constant mask.
static LLVM_ABI OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
MemIndexedMode
The type of load/store indexing.
AddressingModeKind
Which addressing mode Loop Strength Reduction will try to generate.
@ AMK_None
Don't prefer any addressing mode.
static VectorInstrContext getVectorInstrContextHint(const Instruction *I)
Calculates a VectorInstrContext from I.
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
CastContextHint
Represents a hint about the context in which a cast is used.
CacheLevel
The possible cache levels.
This class represents a truncation of integer types.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
Base class of all SIMD vector types.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
bool match(Val *V, const Pattern &P)
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Length
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
InstructionCost Cost
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
Definition InstrProf.h:267
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
RecurKind
These are the kinds of recurrences that we support.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
InstructionUniformity
Enum describing how instructions behave with respect to uniformity and divergence,...
Definition Uniformity.h:18
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Attributes of a target dependent hardware loop.
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
Information about a load/store intrinsic defined by the target.
Returns options for expansion of memcmp. IsZeroCmp is.
Describe known properties for a set of pointers.
Parameters that control the generic loop unrolling transformation.