LLVM 17.0.0git
TargetTransformInfo.h
Go to the documentation of this file.
1//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This pass exposes codegen information to IR-level passes. Every
10/// transformation that uses codegen information is broken into three parts:
11/// 1. The IR-level analysis pass.
12/// 2. The IR-level transformation interface which provides the needed
13/// information.
14/// 3. Codegen-level implementation which uses target-specific hooks.
15///
16/// This file defines #2, which is the interface that IR-level transformations
17/// use for querying the codegen.
18///
19//===----------------------------------------------------------------------===//
20
21#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
22#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23
25#include "llvm/IR/FMF.h"
26#include "llvm/IR/InstrTypes.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
32#include <functional>
33#include <optional>
34#include <utility>
35
36namespace llvm {
37
38namespace Intrinsic {
39typedef unsigned ID;
40}
41
42class AssumptionCache;
44class DominatorTree;
45class BranchInst;
46class CallBase;
47class Function;
48class GlobalValue;
49class InstCombiner;
52class IntrinsicInst;
53class LoadInst;
54class Loop;
55class LoopInfo;
59class SCEV;
60class ScalarEvolution;
61class StoreInst;
62class SwitchInst;
64class Type;
65class User;
66class Value;
67class VPIntrinsic;
68struct KnownBits;
69
70/// Information about a load/store intrinsic defined by the target.
72 /// This is the pointer that the intrinsic is loading from or storing to.
73 /// If this is non-null, then analysis/optimization passes can assume that
74 /// this intrinsic is functionally equivalent to a load/store from this
75 /// pointer.
76 Value *PtrVal = nullptr;
77
78 // Ordering for atomic operations.
80
81 // Same Id is set by the target for corresponding load/store intrinsics.
82 unsigned short MatchingId = 0;
83
84 bool ReadMem = false;
85 bool WriteMem = false;
86 bool IsVolatile = false;
87
88 bool isUnordered() const {
92 }
93};
94
95/// Attributes of a target dependent hardware loop.
97 HardwareLoopInfo() = delete;
99 Loop *L = nullptr;
102 const SCEV *ExitCount = nullptr;
104 Value *LoopDecrement = nullptr; // Decrement the loop counter by this
105 // value in every iteration.
106 bool IsNestingLegal = false; // Can a hardware loop be a parent to
107 // another hardware loop?
108 bool CounterInReg = false; // Should loop counter be updated in
109 // the loop via a phi?
110 bool PerformEntryTest = false; // Generate the intrinsic which also performs
111 // icmp ne zero on the loop counter value and
112 // produces an i1 to guard the loop entry.
114 DominatorTree &DT, bool ForceNestedLoop = false,
115 bool ForceHardwareLoopPHI = false);
116 bool canAnalyze(LoopInfo &LI);
117};
118
120 const IntrinsicInst *II = nullptr;
121 Type *RetTy = nullptr;
122 Intrinsic::ID IID;
123 SmallVector<Type *, 4> ParamTys;
125 FastMathFlags FMF;
126 // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
127 // arguments and the return value will be computed based on types.
128 InstructionCost ScalarizationCost = InstructionCost::getInvalid();
129
130public:
132 Intrinsic::ID Id, const CallBase &CI,
134 bool TypeBasedOnly = false);
135
137 Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys,
138 FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr,
140
143
147 const IntrinsicInst *I = nullptr,
149
150 Intrinsic::ID getID() const { return IID; }
151 const IntrinsicInst *getInst() const { return II; }
152 Type *getReturnType() const { return RetTy; }
153 FastMathFlags getFlags() const { return FMF; }
154 InstructionCost getScalarizationCost() const { return ScalarizationCost; }
156 const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
157
158 bool isTypeBasedOnly() const {
159 return Arguments.empty();
160 }
161
162 bool skipScalarizationCost() const { return ScalarizationCost.isValid(); }
163};
164
166 /// Don't use tail folding
167 None,
168 /// Use predicate only to mask operations on data in the loop.
169 /// When the VL is not known to be a power-of-2, this method requires a
170 /// runtime overflow check for the i + VL in the loop because it compares the
171 /// scalar induction variable against the tripcount rounded up by VL which may
172 /// overflow. When the VL is a power-of-2, both the increment and uprounded
173 /// tripcount will overflow to 0, which does not require a runtime check
174 /// since the loop is exited when the loop induction variable equals the
175 /// uprounded trip-count, which are both 0.
176 Data,
177 /// Same as Data, but avoids using the get.active.lane.mask intrinsic to
178 /// calculate the mask and instead implements this with a
179 /// splat/stepvector/cmp.
180 /// FIXME: Can this kind be removed now that SelectionDAGBuilder expands the
181 /// active.lane.mask intrinsic when it is not natively supported?
183 /// Use predicate to control both data and control flow.
184 /// This method always requires a runtime overflow check for the i + VL
185 /// increment inside the loop, because it uses the result direclty in the
186 /// active.lane.mask to calculate the mask for the next iteration. If the
187 /// increment overflows, the mask is no longer correct.
189 /// Use predicate to control both data and control flow, but modify
190 /// the trip count so that a runtime overflow check can be avoided
191 /// and such that the scalar epilogue loop can always be removed.
193};
194
201 : TLI(TLI), LVL(LVL), IAI(IAI) {}
202};
203
204class TargetTransformInfo;
206
207/// This pass provides access to the codegen interfaces that are needed
208/// for IR-level transformations.
210public:
211 /// Construct a TTI object using a type implementing the \c Concept
212 /// API below.
213 ///
214 /// This is used by targets to construct a TTI wrapping their target-specific
215 /// implementation that encodes appropriate costs for their target.
216 template <typename T> TargetTransformInfo(T Impl);
217
218 /// Construct a baseline TTI object using a minimal implementation of
219 /// the \c Concept API below.
220 ///
221 /// The TTI implementation will reflect the information in the DataLayout
222 /// provided if non-null.
223 explicit TargetTransformInfo(const DataLayout &DL);
224
225 // Provide move semantics.
228
229 // We need to define the destructor out-of-line to define our sub-classes
230 // out-of-line.
232
233 /// Handle the invalidation of this information.
234 ///
235 /// When used as a result of \c TargetIRAnalysis this method will be called
236 /// when the function this was computed for changes. When it returns false,
237 /// the information is preserved across those changes.
240 // FIXME: We should probably in some way ensure that the subtarget
241 // information for a function hasn't changed.
242 return false;
243 }
244
245 /// \name Generic Target Information
246 /// @{
247
248 /// The kind of cost model.
249 ///
250 /// There are several different cost models that can be customized by the
251 /// target. The normalization of each cost model may be target specific.
252 /// e.g. TCK_SizeAndLatency should be comparable to target thresholds such as
253 /// those derived from MCSchedModel::LoopMicroOpBufferSize etc.
255 TCK_RecipThroughput, ///< Reciprocal throughput.
256 TCK_Latency, ///< The latency of instruction.
257 TCK_CodeSize, ///< Instruction code size.
258 TCK_SizeAndLatency ///< The weighted sum of size and latency.
259 };
260
261 /// Underlying constants for 'cost' values in this interface.
262 ///
263 /// Many APIs in this interface return a cost. This enum defines the
264 /// fundamental values that should be used to interpret (and produce) those
265 /// costs. The costs are returned as an int rather than a member of this
266 /// enumeration because it is expected that the cost of one IR instruction
267 /// may have a multiplicative factor to it or otherwise won't fit directly
268 /// into the enum. Moreover, it is common to sum or average costs which works
269 /// better as simple integral values. Thus this enum only provides constants.
270 /// Also note that the returned costs are signed integers to make it natural
271 /// to add, subtract, and test with zero (a common boundary condition). It is
272 /// not expected that 2^32 is a realistic cost to be modeling at any point.
273 ///
274 /// Note that these costs should usually reflect the intersection of code-size
275 /// cost and execution cost. A free instruction is typically one that folds
276 /// into another instruction. For example, reg-to-reg moves can often be
277 /// skipped by renaming the registers in the CPU, but they still are encoded
278 /// and thus wouldn't be considered 'free' here.
280 TCC_Free = 0, ///< Expected to fold away in lowering.
281 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
282 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
283 };
284
285 /// Estimate the cost of a GEP operation when lowered.
287 getGEPCost(Type *PointeeType, const Value *Ptr,
290
291 /// Describe known properties for a set of pointers.
293 /// All the GEPs in a set have same base address.
294 unsigned IsSameBaseAddress : 1;
295 /// These properties only valid if SameBaseAddress is set.
296 /// True if all pointers are separated by a unit stride.
297 unsigned IsUnitStride : 1;
298 /// True if distance between any two neigbouring pointers is a known value.
299 unsigned IsKnownStride : 1;
300 unsigned Reserved : 29;
301
302 bool isSameBase() const { return IsSameBaseAddress; }
303 bool isUnitStride() const { return IsSameBaseAddress && IsUnitStride; }
305
307 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/1,
308 /*IsKnownStride=*/1, 0};
309 }
311 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
312 /*IsKnownStride=*/1, 0};
313 }
315 return {/*IsSameBaseAddress=*/1, /*IsUnitStride=*/0,
316 /*IsKnownStride=*/0, 0};
317 }
318 };
319 static_assert(sizeof(PointersChainInfo) == 4, "Was size increase justified?");
320
321 /// Estimate the cost of a chain of pointers (typically pointer operands of a
322 /// chain of loads or stores within same block) operations set when lowered.
323 /// \p AccessTy is the type of the loads/stores that will ultimately use the
324 /// \p Ptrs.
327 const PointersChainInfo &Info, Type *AccessTy,
329
330 ) const;
331
332 /// \returns A value by which our inlining threshold should be multiplied.
333 /// This is primarily used to bump up the inlining threshold wholesale on
334 /// targets where calls are unusually expensive.
335 ///
336 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
337 /// individual classes of instructions would be better.
338 unsigned getInliningThresholdMultiplier() const;
339
340 /// \returns A value to be added to the inlining threshold.
341 unsigned adjustInliningThreshold(const CallBase *CB) const;
342
343 /// \returns Vector bonus in percent.
344 ///
345 /// Vector bonuses: We want to more aggressively inline vector-dense kernels
346 /// and apply this bonus based on the percentage of vector instructions. A
347 /// bonus is applied if the vector instructions exceed 50% and half that
348 /// amount is applied if it exceeds 10%. Note that these bonuses are some what
349 /// arbitrary and evolved over time by accident as much as because they are
350 /// principled bonuses.
351 /// FIXME: It would be nice to base the bonus values on something more
352 /// scientific. A target may has no bonus on vector instructions.
354
355 /// \return the expected cost of a memcpy, which could e.g. depend on the
356 /// source/destination type and alignment and the number of bytes copied.
358
359 /// \return The estimated number of case clusters when lowering \p 'SI'.
360 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
361 /// table.
363 unsigned &JTSize,
365 BlockFrequencyInfo *BFI) const;
366
367 /// Estimate the cost of a given IR user when lowered.
368 ///
369 /// This can estimate the cost of either a ConstantExpr or Instruction when
370 /// lowered.
371 ///
372 /// \p Operands is a list of operands which can be a result of transformations
373 /// of the current operands. The number of the operands on the list must equal
374 /// to the number of the current operands the IR user has. Their order on the
375 /// list must be the same as the order of the current operands the IR user
376 /// has.
377 ///
378 /// The returned cost is defined in terms of \c TargetCostConstants, see its
379 /// comments for a detailed explanation of the cost values.
383
384 /// This is a helper function which calls the three-argument
385 /// getInstructionCost with \p Operands which are the current operands U has.
387 TargetCostKind CostKind) const {
388 SmallVector<const Value *, 4> Operands(U->operand_values());
390 }
391
392 /// If a branch or a select condition is skewed in one direction by more than
393 /// this factor, it is very likely to be predicted correctly.
395
396 /// Return true if branch divergence exists.
397 ///
398 /// Branch divergence has a significantly negative impact on GPU performance
399 /// when threads in the same wavefront take different paths due to conditional
400 /// branches.
401 bool hasBranchDivergence() const;
402
403 /// Returns whether V is a source of divergence.
404 ///
405 /// This function provides the target-dependent information for
406 /// the target-independent UniformityAnalysis.
407 bool isSourceOfDivergence(const Value *V) const;
408
409 // Returns true for the target specific
410 // set of operations which produce uniform result
411 // even taking non-uniform arguments
412 bool isAlwaysUniform(const Value *V) const;
413
414 /// Query the target whether the specified address space cast from FromAS to
415 /// ToAS is valid.
416 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
417
418 /// Returns the address space ID for a target's 'flat' address space. Note
419 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
420 /// refers to as the generic address space. The flat address space is a
421 /// generic address space that can be used access multiple segments of memory
422 /// with different address spaces. Access of a memory location through a
423 /// pointer with this address space is expected to be legal but slower
424 /// compared to the same memory location accessed through a pointer with a
425 /// different address space.
426 //
427 /// This is for targets with different pointer representations which can
428 /// be converted with the addrspacecast instruction. If a pointer is converted
429 /// to this address space, optimizations should attempt to replace the access
430 /// with the source address space.
431 ///
432 /// \returns ~0u if the target does not have such a flat address space to
433 /// optimize away.
434 unsigned getFlatAddressSpace() const;
435
436 /// Return any intrinsic address operand indexes which may be rewritten if
437 /// they use a flat address space pointer.
438 ///
439 /// \returns true if the intrinsic was handled.
441 Intrinsic::ID IID) const;
442
443 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
444
445 /// Return true if globals in this address space can have initializers other
446 /// than `undef`.
448
449 unsigned getAssumedAddrSpace(const Value *V) const;
450
451 bool isSingleThreaded() const;
452
453 std::pair<const Value *, unsigned>
454 getPredicatedAddrSpace(const Value *V) const;
455
456 /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
457 /// NewV, which has a different address space. This should happen for every
458 /// operand index that collectFlatAddressOperands returned for the intrinsic.
459 /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
460 /// new value (which may be the original \p II with modified operands).
462 Value *NewV) const;
463
464 /// Test whether calls to a function lower to actual program function
465 /// calls.
466 ///
467 /// The idea is to test whether the program is likely to require a 'call'
468 /// instruction or equivalent in order to call the given function.
469 ///
470 /// FIXME: It's not clear that this is a good or useful query API. Client's
471 /// should probably move to simpler cost metrics using the above.
472 /// Alternatively, we could split the cost interface into distinct code-size
473 /// and execution-speed costs. This would allow modelling the core of this
474 /// query more accurately as a call is a single small instruction, but
475 /// incurs significant execution cost.
476 bool isLoweredToCall(const Function *F) const;
477
478 struct LSRCost {
479 /// TODO: Some of these could be merged. Also, a lexical ordering
480 /// isn't always optimal.
481 unsigned Insns;
482 unsigned NumRegs;
483 unsigned AddRecCost;
484 unsigned NumIVMuls;
485 unsigned NumBaseAdds;
486 unsigned ImmCost;
487 unsigned SetupCost;
488 unsigned ScaleCost;
489 };
490
491 /// Parameters that control the generic loop unrolling transformation.
493 /// The cost threshold for the unrolled loop. Should be relative to the
494 /// getInstructionCost values returned by this API, and the expectation is
495 /// that the unrolled loop's instructions when run through that interface
496 /// should not exceed this cost. However, this is only an estimate. Also,
497 /// specific loops may be unrolled even with a cost above this threshold if
498 /// deemed profitable. Set this to UINT_MAX to disable the loop body cost
499 /// restriction.
500 unsigned Threshold;
501 /// If complete unrolling will reduce the cost of the loop, we will boost
502 /// the Threshold by a certain percent to allow more aggressive complete
503 /// unrolling. This value provides the maximum boost percentage that we
504 /// can apply to Threshold (The value should be no less than 100).
505 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
506 /// MaxPercentThresholdBoost / 100)
507 /// E.g. if complete unrolling reduces the loop execution time by 50%
508 /// then we boost the threshold by the factor of 2x. If unrolling is not
509 /// expected to reduce the running time, then we do not increase the
510 /// threshold.
512 /// The cost threshold for the unrolled loop when optimizing for size (set
513 /// to UINT_MAX to disable).
515 /// The cost threshold for the unrolled loop, like Threshold, but used
516 /// for partial/runtime unrolling (set to UINT_MAX to disable).
518 /// The cost threshold for the unrolled loop when optimizing for size, like
519 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
520 /// UINT_MAX to disable).
522 /// A forced unrolling factor (the number of concatenated bodies of the
523 /// original loop in the unrolled loop body). When set to 0, the unrolling
524 /// transformation will select an unrolling factor based on the current cost
525 /// threshold and other factors.
526 unsigned Count;
527 /// Default unroll count for loops with run-time trip count.
529 // Set the maximum unrolling factor. The unrolling factor may be selected
530 // using the appropriate cost threshold, but may not exceed this number
531 // (set to UINT_MAX to disable). This does not apply in cases where the
532 // loop is being fully unrolled.
533 unsigned MaxCount;
534 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
535 /// applies even if full unrolling is selected. This allows a target to fall
536 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
538 // Represents number of instructions optimized when "back edge"
539 // becomes "fall through" in unrolled loop.
540 // For now we count a conditional branch on a backedge and a comparison
541 // feeding it.
542 unsigned BEInsns;
543 /// Allow partial unrolling (unrolling of loops to expand the size of the
544 /// loop body, not only to eliminate small constant-trip-count loops).
546 /// Allow runtime unrolling (unrolling of loops to expand the size of the
547 /// loop body even when the number of loop iterations is not known at
548 /// compile time).
550 /// Allow generation of a loop remainder (extra iterations after unroll).
552 /// Allow emitting expensive instructions (such as divisions) when computing
553 /// the trip count of a loop for runtime unrolling.
555 /// Apply loop unroll on any kind of loop
556 /// (mainly to loops that fail runtime unrolling).
557 bool Force;
558 /// Allow using trip count upper bound to unroll loops.
560 /// Allow unrolling of all the iterations of the runtime loop remainder.
562 /// Allow unroll and jam. Used to enable unroll and jam for the target.
564 /// Threshold for unroll and jam, for inner loop size. The 'Threshold'
565 /// value above is used during unroll and jam for the outer loop size.
566 /// This value is used in the same manner to limit the size of the inner
567 /// loop.
569 /// Don't allow loop unrolling to simulate more than this number of
570 /// iterations when checking full unroll profitability
572 /// Don't disable runtime unroll for the loops which were vectorized.
574 };
575
576 /// Get target-customized preferences for the generic loop unrolling
577 /// transformation. The caller will initialize UP with the current
578 /// target-independent defaults.
581 OptimizationRemarkEmitter *ORE) const;
582
583 /// Query the target whether it would be profitable to convert the given loop
584 /// into a hardware loop.
587 HardwareLoopInfo &HWLoopInfo) const;
588
589 /// Query the target whether it would be prefered to create a predicated
590 /// vector loop, which can avoid the need to emit a scalar epilogue loop.
592
593 /// Query the target what the preferred style of tail folding is.
594 /// \param IVUpdateMayOverflow Tells whether it is known if the IV update
595 /// may (or will never) overflow for the suggested VF/UF in the given loop.
596 /// Targets can use this information to select a more optimal tail folding
597 /// style. The value conservatively defaults to true, such that no assumptions
598 /// are made on overflow.
600 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const;
601
602 // Parameters that control the loop peeling transformation
604 /// A forced peeling factor (the number of bodied of the original loop
605 /// that should be peeled off before the loop body). When set to 0, the
606 /// a peeling factor based on profile information and other factors.
607 unsigned PeelCount;
608 /// Allow peeling off loop iterations.
610 /// Allow peeling off loop iterations for loop nests.
612 /// Allow peeling basing on profile. Uses to enable peeling off all
613 /// iterations basing on provided profile.
614 /// If the value is true the peeling cost model can decide to peel only
615 /// some iterations and in this case it will set this to false.
617 };
618
619 /// Get target-customized preferences for the generic loop peeling
620 /// transformation. The caller will initialize \p PP with the current
621 /// target-independent defaults with information from \p L and \p SE.
623 PeelingPreferences &PP) const;
624
625 /// Targets can implement their own combinations for target-specific
626 /// intrinsics. This function will be called from the InstCombine pass every
627 /// time a target-specific intrinsic is encountered.
628 ///
629 /// \returns std::nullopt to not do anything target specific or a value that
630 /// will be returned from the InstCombiner. It is possible to return null and
631 /// stop further processing of the intrinsic by returning nullptr.
632 std::optional<Instruction *> instCombineIntrinsic(InstCombiner & IC,
633 IntrinsicInst & II) const;
634 /// Can be used to implement target-specific instruction combining.
635 /// \see instCombineIntrinsic
636 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
637 InstCombiner & IC, IntrinsicInst & II, APInt DemandedMask,
638 KnownBits & Known, bool &KnownBitsComputed) const;
639 /// Can be used to implement target-specific instruction combining.
640 /// \see instCombineIntrinsic
641 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
642 InstCombiner & IC, IntrinsicInst & II, APInt DemandedElts,
643 APInt & UndefElts, APInt & UndefElts2, APInt & UndefElts3,
644 std::function<void(Instruction *, unsigned, APInt, APInt &)>
645 SimplifyAndSetOp) const;
646 /// @}
647
648 /// \name Scalar Target Information
649 /// @{
650
651 /// Flags indicating the kind of support for population count.
652 ///
653 /// Compared to the SW implementation, HW support is supposed to
654 /// significantly boost the performance when the population is dense, and it
655 /// may or may not degrade performance if the population is sparse. A HW
656 /// support is considered as "Fast" if it can outperform, or is on a par
657 /// with, SW implementation when the population is sparse; otherwise, it is
658 /// considered as "Slow".
660
661 /// Return true if the specified immediate is legal add immediate, that
662 /// is the target has add instructions which can add a register with the
663 /// immediate without having to materialize the immediate into a register.
664 bool isLegalAddImmediate(int64_t Imm) const;
665
666 /// Return true if the specified immediate is legal icmp immediate,
667 /// that is the target has icmp instructions which can compare a register
668 /// against the immediate without having to materialize the immediate into a
669 /// register.
670 bool isLegalICmpImmediate(int64_t Imm) const;
671
672 /// Return true if the addressing mode represented by AM is legal for
673 /// this target, for a load/store of the specified type.
674 /// The type may be VoidTy, in which case only return true if the addressing
675 /// mode is legal for a load/store of any legal type.
676 /// If target returns true in LSRWithInstrQueries(), I may be valid.
677 /// TODO: Handle pre/postinc as well.
678 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
679 bool HasBaseReg, int64_t Scale,
680 unsigned AddrSpace = 0,
681 Instruction *I = nullptr) const;
682
683 /// Return true if LSR cost of C1 is lower than C2.
685 const TargetTransformInfo::LSRCost &C2) const;
686
687 /// Return true if LSR major cost is number of registers. Targets which
688 /// implement their own isLSRCostLess and unset number of registers as major
689 /// cost should return false, otherwise return true.
690 bool isNumRegsMajorCostOfLSR() const;
691
692 /// \returns true if LSR should not optimize a chain that includes \p I.
694
695 /// Return true if the target can fuse a compare and branch.
696 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
697 /// calculation for the instructions in a loop.
698 bool canMacroFuseCmp() const;
699
700 /// Return true if the target can save a compare for loop count, for example
701 /// hardware loop saves a compare.
702 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
704 TargetLibraryInfo *LibInfo) const;
705
710 };
711
712 /// Return the preferred addressing mode LSR should make efforts to generate.
714 ScalarEvolution *SE) const;
715
716 /// Return true if the target supports masked store.
717 bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
718 /// Return true if the target supports masked load.
719 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
720
721 /// Return true if the target supports nontemporal store.
722 bool isLegalNTStore(Type *DataType, Align Alignment) const;
723 /// Return true if the target supports nontemporal load.
724 bool isLegalNTLoad(Type *DataType, Align Alignment) const;
725
726 /// \Returns true if the target supports broadcasting a load to a vector of
727 /// type <NumElements x ElementTy>.
728 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const;
729
730 /// Return true if the target supports masked scatter.
731 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
732 /// Return true if the target supports masked gather.
733 bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
734 /// Return true if the target forces scalarizing of llvm.masked.gather
735 /// intrinsics.
736 bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const;
737 /// Return true if the target forces scalarizing of llvm.masked.scatter
738 /// intrinsics.
739 bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const;
740
741 /// Return true if the target supports masked compress store.
742 bool isLegalMaskedCompressStore(Type *DataType) const;
743 /// Return true if the target supports masked expand load.
744 bool isLegalMaskedExpandLoad(Type *DataType) const;
745
746 /// Return true if this is an alternating opcode pattern that can be lowered
747 /// to a single instruction on the target. In X86 this is for the addsub
748 /// instruction which corrsponds to a Shuffle + Fadd + FSub pattern in IR.
749 /// This function expectes two opcodes: \p Opcode1 and \p Opcode2 being
750 /// selected by \p OpcodeMask. The mask contains one bit per lane and is a `0`
751 /// when \p Opcode0 is selected and `1` when Opcode1 is selected.
752 /// \p VecTy is the vector type of the instruction to be generated.
753 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
754 const SmallBitVector &OpcodeMask) const;
755
756 /// Return true if we should be enabling ordered reductions for the target.
757 bool enableOrderedReductions() const;
758
759 /// Return true if the target has a unified operation to calculate division
760 /// and remainder. If so, the additional implicit multiplication and
761 /// subtraction required to calculate a remainder from division are free. This
762 /// can enable more aggressive transformations for division and remainder than
763 /// would typically be allowed using throughput or size cost models.
764 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
765
766 /// Return true if the given instruction (assumed to be a memory access
767 /// instruction) has a volatile variant. If that's the case then we can avoid
768 /// addrspacecast to generic AS for volatile loads/stores. Default
769 /// implementation returns false, which prevents address space inference for
770 /// volatile loads/stores.
771 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
772
773 /// Return true if target doesn't mind addresses in vectors.
774 bool prefersVectorizedAddressing() const;
775
776 /// Return the cost of the scaling factor used in the addressing
777 /// mode represented by AM for this target, for a load/store
778 /// of the specified type.
779 /// If the AM is supported, the return value must be >= 0.
780 /// If the AM is not supported, it returns a negative value.
781 /// TODO: Handle pre/postinc as well.
783 int64_t BaseOffset, bool HasBaseReg,
784 int64_t Scale,
785 unsigned AddrSpace = 0) const;
786
787 /// Return true if the loop strength reduce pass should make
788 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
789 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
790 /// immediate offset and no index register.
791 bool LSRWithInstrQueries() const;
792
793 /// Return true if it's free to truncate a value of type Ty1 to type
794 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
795 /// by referencing its sub-register AX.
796 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
797
798 /// Return true if it is profitable to hoist instruction in the
799 /// then/else to before if.
800 bool isProfitableToHoist(Instruction *I) const;
801
802 bool useAA() const;
803
804 /// Return true if this type is legal.
805 bool isTypeLegal(Type *Ty) const;
806
807 /// Returns the estimated number of registers required to represent \p Ty.
808 unsigned getRegUsageForType(Type *Ty) const;
809
810 /// Return true if switches should be turned into lookup tables for the
811 /// target.
812 bool shouldBuildLookupTables() const;
813
814 /// Return true if switches should be turned into lookup tables
815 /// containing this constant value for the target.
817
818 /// Return true if lookup tables should be turned into relative lookup tables.
819 bool shouldBuildRelLookupTables() const;
820
821 /// Return true if the input function which is cold at all call sites,
822 /// should use coldcc calling convention.
823 bool useColdCCForColdCall(Function &F) const;
824
825 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
826 /// are set if the demanded result elements need to be inserted and/or
827 /// extracted from vectors.
829 const APInt &DemandedElts,
830 bool Insert, bool Extract,
832
833 /// Estimate the overhead of scalarizing an instructions unique
834 /// non-constant operands. The (potentially vector) types to use for each of
835 /// argument are passes via Tys.
840
841 /// If target has efficient vector element load/store instructions, it can
842 /// return true here so that insertion/extraction costs are not added to
843 /// the scalarization cost of a load/store.
845
846 /// If the target supports tail calls.
847 bool supportsTailCalls() const;
848
849 /// If target supports tail call on \p CB
850 bool supportsTailCallFor(const CallBase *CB) const;
851
852 /// Don't restrict interleaved unrolling to small loops.
853 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
854
855 /// Returns options for expansion of memcmp. IsZeroCmp is
856 // true if this is the expansion of memcmp(p1, p2, s) == 0.
858 // Return true if memcmp expansion is enabled.
859 operator bool() const { return MaxNumLoads > 0; }
860
861 // Maximum number of load operations.
862 unsigned MaxNumLoads = 0;
863
864 // The list of available load sizes (in bytes), sorted in decreasing order.
866
867 // For memcmp expansion when the memcmp result is only compared equal or
868 // not-equal to 0, allow up to this number of load pairs per block. As an
869 // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
870 // a0 = load2bytes &a[0]
871 // b0 = load2bytes &b[0]
872 // a2 = load1byte &a[2]
873 // b2 = load1byte &b[2]
874 // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
875 unsigned NumLoadsPerBlock = 1;
876
877 // Set to true to allow overlapping loads. For example, 7-byte compares can
878 // be done with two 4-byte compares instead of 4+2+1-byte compares. This
879 // requires all loads in LoadSizes to be doable in an unaligned way.
881 };
883 bool IsZeroCmp) const;
884
885 /// Should the Select Optimization pass be enabled and ran.
886 bool enableSelectOptimize() const;
887
888 /// Enable matching of interleaved access groups.
890
891 /// Enable matching of interleaved access groups that contain predicated
892 /// accesses or gaps and therefore vectorized using masked
893 /// vector loads/stores.
895
896 /// Indicate that it is potentially unsafe to automatically vectorize
897 /// floating-point operations because the semantics of vector and scalar
898 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
899 /// does not support IEEE-754 denormal numbers, while depending on the
900 /// platform, scalar floating-point math does.
901 /// This applies to floating-point math operations and calls, not memory
902 /// operations, shuffles, or casts.
904
905 /// Determine if the target supports unaligned memory accesses.
907 unsigned AddressSpace = 0,
908 Align Alignment = Align(1),
909 unsigned *Fast = nullptr) const;
910
911 /// Return hardware support for population count.
912 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
913
914 /// Return true if the hardware has a fast square-root instruction.
915 bool haveFastSqrt(Type *Ty) const;
916
917 /// Return true if the cost of the instruction is too high to speculatively
918 /// execute and should be kept behind a branch.
919 /// This normally just wraps around a getInstructionCost() call, but some
920 /// targets might report a low TCK_SizeAndLatency value that is incompatible
921 /// with the fixed TCC_Expensive value.
922 /// NOTE: This assumes the instruction passes isSafeToSpeculativelyExecute().
924
925 /// Return true if it is faster to check if a floating-point value is NaN
926 /// (or not-NaN) versus a comparison against a constant FP zero value.
927 /// Targets should override this if materializing a 0.0 for comparison is
928 /// generally as cheap as checking for ordered/unordered.
929 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
930
931 /// Return the expected cost of supporting the floating point operation
932 /// of the specified type.
934
935 /// Return the expected cost of materializing for the given integer
936 /// immediate of the specified type.
937 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
939
940 /// Return the expected cost of materialization for the given integer
941 /// immediate of the specified type for a given instruction. The cost can be
942 /// zero if the immediate can be folded into the specified instruction.
943 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
944 const APInt &Imm, Type *Ty,
946 Instruction *Inst = nullptr) const;
948 const APInt &Imm, Type *Ty,
950
951 /// Return the expected cost for the given integer when optimising
952 /// for size. This is different than the other integer immediate cost
953 /// functions in that it is subtarget agnostic. This is useful when you e.g.
954 /// target one ISA such as Aarch32 but smaller encodings could be possible
955 /// with another such as Thumb. This return value is used as a penalty when
956 /// the total costs for a constant is calculated (the bigger the cost, the
957 /// more beneficial constant hoisting is).
958 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
959 const APInt &Imm, Type *Ty) const;
960 /// @}
961
962 /// \name Vector Target Information
963 /// @{
964
965 /// The various kinds of shuffle patterns for vector queries.
967 SK_Broadcast, ///< Broadcast element 0 to all other elements.
968 SK_Reverse, ///< Reverse the order of the vector.
969 SK_Select, ///< Selects elements from the corresponding lane of
970 ///< either source operand. This is equivalent to a
971 ///< vector select with a constant condition operand.
972 SK_Transpose, ///< Transpose two vectors.
973 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
974 SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
975 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
976 ///< with any shuffle mask.
977 SK_PermuteSingleSrc, ///< Shuffle elements of single source vector with any
978 ///< shuffle mask.
979 SK_Splice ///< Concatenates elements from the first input vector
980 ///< with elements of the second input vector. Returning
981 ///< a vector of the same type as the input vectors.
982 ///< Index indicates start offset in first input vector.
983 };
984
985 /// Additional information about an operand's possible values.
987 OK_AnyValue, // Operand can have any value.
988 OK_UniformValue, // Operand is uniform (splat of a value).
989 OK_UniformConstantValue, // Operand is uniform constant.
990 OK_NonUniformConstantValue // Operand is a non uniform constant value.
991 };
992
993 /// Additional properties of an operand's values.
998 };
999
1000 // Describe the values an operand can take. We're in the process
1001 // of migrating uses of OperandValueKind and OperandValueProperties
1002 // to use this class, and then will change the internal representation.
1006
1007 bool isConstant() const {
1009 }
1010 bool isUniform() const {
1012 }
1013 bool isPowerOf2() const {
1014 return Properties == OP_PowerOf2;
1015 }
1016 bool isNegatedPowerOf2() const {
1018 }
1019
1021 return {Kind, OP_None};
1022 }
1023 };
1024
1025 /// \return the number of registers in the target-provided register class.
1026 unsigned getNumberOfRegisters(unsigned ClassID) const;
1027
1028 /// \return the target-provided register class ID for the provided type,
1029 /// accounting for type promotion and other type-legalization techniques that
1030 /// the target might apply. However, it specifically does not account for the
1031 /// scalarization or splitting of vector types. Should a vector type require
1032 /// scalarization or splitting into multiple underlying vector registers, that
1033 /// type should be mapped to a register class containing no registers.
1034 /// Specifically, this is designed to provide a simple, high-level view of the
1035 /// register allocation later performed by the backend. These register classes
1036 /// don't necessarily map onto the register classes used by the backend.
1037 /// FIXME: It's not currently possible to determine how many registers
1038 /// are used by the provided type.
1039 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
1040
1041 /// \return the target-provided register class name
1042 const char *getRegisterClassName(unsigned ClassID) const;
1043
1045
1046 /// \return The width of the largest scalar or vector register type.
1048
1049 /// \return The width of the smallest vector register type.
1050 unsigned getMinVectorRegisterBitWidth() const;
1051
1052 /// \return The maximum value of vscale if the target specifies an
1053 /// architectural maximum vector length, and std::nullopt otherwise.
1054 std::optional<unsigned> getMaxVScale() const;
1055
1056 /// \return the value of vscale to tune the cost model for.
1057 std::optional<unsigned> getVScaleForTuning() const;
1058
1059 /// \return true if vscale is known to be a power of 2
1060 bool isVScaleKnownToBeAPowerOfTwo() const;
1061
1062 /// \return True if the vectorization factor should be chosen to
1063 /// make the vector of the smallest element type match the size of a
1064 /// vector register. For wider element types, this could result in
1065 /// creating vectors that span multiple vector registers.
1066 /// If false, the vectorization factor will be chosen based on the
1067 /// size of the widest element type.
1068 /// \p K Register Kind for vectorization.
1070
1071 /// \return The minimum vectorization factor for types of given element
1072 /// bit width, or 0 if there is no minimum VF. The returned value only
1073 /// applies when shouldMaximizeVectorBandwidth returns true.
1074 /// If IsScalable is true, the returned ElementCount must be a scalable VF.
1075 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const;
1076
1077 /// \return The maximum vectorization factor for types of given element
1078 /// bit width and opcode, or 0 if there is no maximum VF.
1079 /// Currently only used by the SLP vectorizer.
1080 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
1081
1082 /// \return The minimum vectorization factor for the store instruction. Given
1083 /// the initial estimation of the minimum vector factor and store value type,
1084 /// it tries to find possible lowest VF, which still might be profitable for
1085 /// the vectorization.
1086 /// \param VF Initial estimation of the minimum vector factor.
1087 /// \param ScalarMemTy Scalar memory type of the store operation.
1088 /// \param ScalarValTy Scalar type of the stored value.
1089 /// Currently only used by the SLP vectorizer.
1090 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1091 Type *ScalarValTy) const;
1092
1093 /// \return True if it should be considered for address type promotion.
1094 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
1095 /// profitable without finding other extensions fed by the same input.
1097 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
1098
1099 /// \return The size of a cache line in bytes.
1100 unsigned getCacheLineSize() const;
1101
1102 /// The possible cache levels
1103 enum class CacheLevel {
1104 L1D, // The L1 data cache
1105 L2D, // The L2 data cache
1106
1107 // We currently do not model L3 caches, as their sizes differ widely between
1108 // microarchitectures. Also, we currently do not have a use for L3 cache
1109 // size modeling yet.
1110 };
1111
1112 /// \return The size of the cache level in bytes, if available.
1113 std::optional<unsigned> getCacheSize(CacheLevel Level) const;
1114
1115 /// \return The associativity of the cache level, if available.
1116 std::optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
1117
1118 /// \return How much before a load we should place the prefetch
1119 /// instruction. This is currently measured in number of
1120 /// instructions.
1121 unsigned getPrefetchDistance() const;
1122
1123 /// Some HW prefetchers can handle accesses up to a certain constant stride.
1124 /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
1125 /// and the arguments provided are meant to serve as a basis for deciding this
1126 /// for a particular loop.
1127 ///
1128 /// \param NumMemAccesses Number of memory accesses in the loop.
1129 /// \param NumStridedMemAccesses Number of the memory accesses that
1130 /// ScalarEvolution could find a known stride
1131 /// for.
1132 /// \param NumPrefetches Number of software prefetches that will be
1133 /// emitted as determined by the addresses
1134 /// involved and the cache line size.
1135 /// \param HasCall True if the loop contains a call.
1136 ///
1137 /// \return This is the minimum stride in bytes where it makes sense to start
1138 /// adding SW prefetches. The default is 1, i.e. prefetch with any
1139 /// stride.
1140 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1141 unsigned NumStridedMemAccesses,
1142 unsigned NumPrefetches, bool HasCall) const;
1143
1144 /// \return The maximum number of iterations to prefetch ahead. If
1145 /// the required number of iterations is more than this number, no
1146 /// prefetching is performed.
1147 unsigned getMaxPrefetchIterationsAhead() const;
1148
1149 /// \return True if prefetching should also be done for writes.
1150 bool enableWritePrefetching() const;
1151
1152 /// \return if target want to issue a prefetch in address space \p AS.
1153 bool shouldPrefetchAddressSpace(unsigned AS) const;
1154
1155 /// \return The maximum interleave factor that any transform should try to
1156 /// perform for this target. This number depends on the level of parallelism
1157 /// and the number of execution units in the CPU.
1158 unsigned getMaxInterleaveFactor(ElementCount VF) const;
1159
1160 /// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
1161 static OperandValueInfo getOperandInfo(const Value *V);
1162
1163 /// This is an approximation of reciprocal throughput of a math/logic op.
1164 /// A higher cost indicates less expected throughput.
1165 /// From Agner Fog's guides, reciprocal throughput is "the average number of
1166 /// clock cycles per instruction when the instructions are not part of a
1167 /// limiting dependency chain."
1168 /// Therefore, costs should be scaled to account for multiple execution units
1169 /// on the target that can process this type of instruction. For example, if
1170 /// there are 5 scalar integer units and 2 vector integer units that can
1171 /// calculate an 'add' in a single cycle, this model should indicate that the
1172 /// cost of the vector add instruction is 2.5 times the cost of the scalar
1173 /// add instruction.
1174 /// \p Args is an optional argument which holds the instruction operands
1175 /// values so the TTI can analyze those values searching for special
1176 /// cases or optimizations based on those values.
1177 /// \p CxtI is the optional original context instruction, if one exists, to
1178 /// provide even more information.
1180 unsigned Opcode, Type *Ty,
1183 TTI::OperandValueInfo Opd2Info = {TTI::OK_AnyValue, TTI::OP_None},
1184 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
1185 const Instruction *CxtI = nullptr) const;
1186
1187 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
1188 /// The exact mask may be passed as Mask, or else the array will be empty.
1189 /// The index and subtype parameters are used by the subvector insertion and
1190 /// extraction shuffle kinds to show the insert/extract point and the type of
1191 /// the subvector being inserted/extracted. The operands of the shuffle can be
1192 /// passed through \p Args, which helps improve the cost estimation in some
1193 /// cases, like in broadcast loads.
1194 /// NOTE: For subvector extractions Tp represents the source type.
1195 InstructionCost
1197 ArrayRef<int> Mask = std::nullopt,
1199 int Index = 0, VectorType *SubTp = nullptr,
1200 ArrayRef<const Value *> Args = std::nullopt) const;
1201
1202 /// Represents a hint about the context in which a cast is used.
1203 ///
1204 /// For zext/sext, the context of the cast is the operand, which must be a
1205 /// load of some kind. For trunc, the context is of the cast is the single
1206 /// user of the instruction, which must be a store of some kind.
1207 ///
1208 /// This enum allows the vectorizer to give getCastInstrCost an idea of the
1209 /// type of cast it's dealing with, as not every cast is equal. For instance,
1210 /// the zext of a load may be free, but the zext of an interleaving load can
1211 //// be (very) expensive!
1212 ///
1213 /// See \c getCastContextHint to compute a CastContextHint from a cast
1214 /// Instruction*. Callers can use it if they don't need to override the
1215 /// context and just want it to be calculated from the instruction.
1216 ///
1217 /// FIXME: This handles the types of load/store that the vectorizer can
1218 /// produce, which are the cases where the context instruction is most
1219 /// likely to be incorrect. There are other situations where that can happen
1220 /// too, which might be handled here but in the long run a more general
1221 /// solution of costing multiple instructions at the same times may be better.
1222 enum class CastContextHint : uint8_t {
1223 None, ///< The cast is not used with a load/store of any kind.
1224 Normal, ///< The cast is used with a normal load/store.
1225 Masked, ///< The cast is used with a masked load/store.
1226 GatherScatter, ///< The cast is used with a gather/scatter.
1227 Interleave, ///< The cast is used with an interleaved load/store.
1228 Reversed, ///< The cast is used with a reversed load/store.
1229 };
1230
1231 /// Calculates a CastContextHint from \p I.
1232 /// This should be used by callers of getCastInstrCost if they wish to
1233 /// determine the context from some instruction.
1234 /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
1235 /// or if it's another type of cast.
1237
1238 /// \return The expected cost of cast instructions, such as bitcast, trunc,
1239 /// zext, etc. If there is an existing instruction that holds Opcode, it
1240 /// may be passed in the 'I' parameter.
1242 getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1245 const Instruction *I = nullptr) const;
1246
1247 /// \return The expected cost of a sign- or zero-extended vector extract. Use
1248 /// Index = -1 to indicate that there is no information about the index value.
1249 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1250 VectorType *VecTy,
1251 unsigned Index) const;
1252
1253 /// \return The expected cost of control-flow related instructions such as
1254 /// Phi, Ret, Br, Switch.
1256 getCFInstrCost(unsigned Opcode,
1258 const Instruction *I = nullptr) const;
1259
1260 /// \returns The expected cost of compare and select instructions. If there
1261 /// is an existing instruction that holds Opcode, it may be passed in the
1262 /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
1263 /// is using a compare with the specified predicate as condition. When vector
1264 /// types are passed, \p VecPred must be used for all lanes.
1266 getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1267 CmpInst::Predicate VecPred,
1269 const Instruction *I = nullptr) const;
1270
1271 /// \return The expected cost of vector Insert and Extract.
1272 /// Use -1 to indicate that there is no information on the index value.
1273 /// This is used when the instruction is not available; a typical use
1274 /// case is to provision the cost of vectorization/scalarization in
1275 /// vectorizer passes.
1276 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1278 unsigned Index = -1, Value *Op0 = nullptr,
1279 Value *Op1 = nullptr) const;
1280
1281 /// \return The expected cost of vector Insert and Extract.
1282 /// This is used when instruction is available, and implementation
1283 /// asserts 'I' is not nullptr.
1284 ///
1285 /// A typical suitable use case is cost estimation when vector instruction
1286 /// exists (e.g., from basic blocks during transformation).
1289 unsigned Index = -1) const;
1290
1291 /// \return The cost of replication shuffle of \p VF elements typed \p EltTy
1292 /// \p ReplicationFactor times.
1293 ///
1294 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
1295 /// <0,0,0,1,1,1,2,2,2,3,3,3>
1296 InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor,
1297 int VF,
1298 const APInt &DemandedDstElts,
1300
1301 /// \return The cost of Load and Store instructions.
1303 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1304 unsigned AddressSpace,
1306 OperandValueInfo OpdInfo = {OK_AnyValue, OP_None},
1307 const Instruction *I = nullptr) const;
1308
1309 /// \return The cost of VP Load and Store instructions.
1310 InstructionCost
1311 getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1312 unsigned AddressSpace,
1314 const Instruction *I = nullptr) const;
1315
1316 /// \return The cost of masked Load and Store instructions.
1318 unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
1320
1321 /// \return The cost of Gather or Scatter operation
1322 /// \p Opcode - is a type of memory access Load or Store
1323 /// \p DataTy - a vector type of the data to be loaded or stored
1324 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
1325 /// \p VariableMask - true when the memory access is predicated with a mask
1326 /// that is not a compile-time constant
1327 /// \p Alignment - alignment of single element
1328 /// \p I - the optional original context instruction, if one exists, e.g. the
1329 /// load/store to transform or the call to the gather/scatter intrinsic
1331 unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
1333 const Instruction *I = nullptr) const;
1334
1335 /// \return The cost of the interleaved memory operation.
1336 /// \p Opcode is the memory operation code
1337 /// \p VecTy is the vector type of the interleaved access.
1338 /// \p Factor is the interleave factor
1339 /// \p Indices is the indices for interleaved load members (as interleaved
1340 /// load allows gaps)
1341 /// \p Alignment is the alignment of the memory operation
1342 /// \p AddressSpace is address space of the pointer.
1343 /// \p UseMaskForCond indicates if the memory access is predicated.
1344 /// \p UseMaskForGaps indicates if gaps should be masked.
1346 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1347 Align Alignment, unsigned AddressSpace,
1349 bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
1350
1351 /// A helper function to determine the type of reduction algorithm used
1352 /// for a given \p Opcode and set of FastMathFlags \p FMF.
1353 static bool requiresOrderedReduction(std::optional<FastMathFlags> FMF) {
1354 return FMF && !(*FMF).allowReassoc();
1355 }
1356
1357 /// Calculate the cost of vector reduction intrinsics.
1358 ///
1359 /// This is the cost of reducing the vector value of type \p Ty to a scalar
1360 /// value using the operation denoted by \p Opcode. The FastMathFlags
1361 /// parameter \p FMF indicates what type of reduction we are performing:
1362 /// 1. Tree-wise. This is the typical 'fast' reduction performed that
1363 /// involves successively splitting a vector into half and doing the
1364 /// operation on the pair of halves until you have a scalar value. For
1365 /// example:
1366 /// (v0, v1, v2, v3)
1367 /// ((v0+v2), (v1+v3), undef, undef)
1368 /// ((v0+v2+v1+v3), undef, undef, undef)
1369 /// This is the default behaviour for integer operations, whereas for
1370 /// floating point we only do this if \p FMF indicates that
1371 /// reassociation is allowed.
1372 /// 2. Ordered. For a vector with N elements this involves performing N
1373 /// operations in lane order, starting with an initial scalar value, i.e.
1374 /// result = InitVal + v0
1375 /// result = result + v1
1376 /// result = result + v2
1377 /// result = result + v3
1378 /// This is only the case for FP operations and when reassociation is not
1379 /// allowed.
1380 ///
1382 unsigned Opcode, VectorType *Ty, std::optional<FastMathFlags> FMF,
1384
1386 VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1389
1390 /// Calculate the cost of an extended reduction pattern, similar to
1391 /// getArithmeticReductionCost of an Add reduction with multiply and optional
1392 /// extensions. This is the cost of as:
1393 /// ResTy vecreduce.add(mul (A, B)).
1394 /// ResTy vecreduce.add(mul(ext(Ty A), ext(Ty B)).
1396 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1398
1399 /// Calculate the cost of an extended reduction pattern, similar to
1400 /// getArithmeticReductionCost of a reduction with an extension.
1401 /// This is the cost of as:
1402 /// ResTy vecreduce.opcode(ext(Ty A)).
1404 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1405 FastMathFlags FMF,
1407
1408 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
1409 /// Three cases are handled: 1. scalar instruction 2. vector instruction
1410 /// 3. scalar instruction which is to be vectorized.
1413
1414 /// \returns The cost of Call instructions.
1418
1419 /// \returns The number of pieces into which the provided type must be
1420 /// split during legalization. Zero is returned when the answer is unknown.
1421 unsigned getNumberOfParts(Type *Tp) const;
1422
1423 /// \returns The cost of the address computation. For most targets this can be
1424 /// merged into the instruction indexing mode. Some targets might want to
1425 /// distinguish between address computation for memory operations on vector
1426 /// types and scalar types. Such targets should override this function.
1427 /// The 'SE' parameter holds pointer for the scalar evolution object which
1428 /// is used in order to get the Ptr step value in case of constant stride.
1429 /// The 'Ptr' parameter holds SCEV of the access pointer.
1431 ScalarEvolution *SE = nullptr,
1432 const SCEV *Ptr = nullptr) const;
1433
1434 /// \returns The cost, if any, of keeping values of the given types alive
1435 /// over a callsite.
1436 ///
1437 /// Some types may require the use of register classes that do not have
1438 /// any callee-saved registers, so would require a spill and fill.
1440
1441 /// \returns True if the intrinsic is a supported memory intrinsic. Info
1442 /// will contain additional information - whether the intrinsic may write
1443 /// or read to memory, volatility and the pointer. Info is undefined
1444 /// if false is returned.
1446
1447 /// \returns The maximum element size, in bytes, for an element
1448 /// unordered-atomic memory intrinsic.
1449 unsigned getAtomicMemIntrinsicMaxElementSize() const;
1450
1451 /// \returns A value which is the result of the given memory intrinsic. New
1452 /// instructions may be created to extract the result from the given intrinsic
1453 /// memory operation. Returns nullptr if the target cannot create a result
1454 /// from the given intrinsic.
1456 Type *ExpectedType) const;
1457
1458 /// \returns The type to use in a loop expansion of a memcpy call.
1460 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1461 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1462 std::optional<uint32_t> AtomicElementSize = std::nullopt) const;
1463
1464 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
1465 /// \param RemainingBytes The number of bytes to copy.
1466 ///
1467 /// Calculates the operand types to use when copying \p RemainingBytes of
1468 /// memory, where source and destination alignments are \p SrcAlign and
1469 /// \p DestAlign respectively.
1471 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1472 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1473 unsigned SrcAlign, unsigned DestAlign,
1474 std::optional<uint32_t> AtomicCpySize = std::nullopt) const;
1475
1476 /// \returns True if the two functions have compatible attributes for inlining
1477 /// purposes.
1478 bool areInlineCompatible(const Function *Caller,
1479 const Function *Callee) const;
1480
1481 /// \returns True if the caller and callee agree on how \p Types will be
1482 /// passed to or returned from the callee.
1483 /// to the callee.
1484 /// \param Types List of types to check.
1485 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
1486 const ArrayRef<Type *> &Types) const;
1487
1488 /// The type of load/store indexing.
1490 MIM_Unindexed, ///< No indexing.
1491 MIM_PreInc, ///< Pre-incrementing.
1492 MIM_PreDec, ///< Pre-decrementing.
1493 MIM_PostInc, ///< Post-incrementing.
1494 MIM_PostDec ///< Post-decrementing.
1496
1497 /// \returns True if the specified indexed load for the given type is legal.
1498 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
1499
1500 /// \returns True if the specified indexed store for the given type is legal.
1501 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
1502
1503 /// \returns The bitwidth of the largest vector type that should be used to
1504 /// load/store in the given address space.
1505 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
1506
1507 /// \returns True if the load instruction is legal to vectorize.
1508 bool isLegalToVectorizeLoad(LoadInst *LI) const;
1509
1510 /// \returns True if the store instruction is legal to vectorize.
1511 bool isLegalToVectorizeStore(StoreInst *SI) const;
1512
1513 /// \returns True if it is legal to vectorize the given load chain.
1514 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
1515 unsigned AddrSpace) const;
1516
1517 /// \returns True if it is legal to vectorize the given store chain.
1518 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
1519 unsigned AddrSpace) const;
1520
1521 /// \returns True if it is legal to vectorize the given reduction kind.
1523 ElementCount VF) const;
1524
1525 /// \returns True if the given type is supported for scalable vectors
1527
1528 /// \returns The new vector factor value if the target doesn't support \p
1529 /// SizeInBytes loads or has a better vector factor.
1530 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1531 unsigned ChainSizeInBytes,
1532 VectorType *VecTy) const;
1533
1534 /// \returns The new vector factor value if the target doesn't support \p
1535 /// SizeInBytes stores or has a better vector factor.
1536 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1537 unsigned ChainSizeInBytes,
1538 VectorType *VecTy) const;
1539
1540 /// Flags describing the kind of vector reduction.
1542 ReductionFlags() = default;
1543 bool IsMaxOp =
1544 false; ///< If the op a min/max kind, true if it's a max operation.
1545 bool IsSigned = false; ///< Whether the operation is a signed int reduction.
1546 bool NoNaN =
1547 false; ///< If op is an fp min/max, whether NaNs may be present.
1548 };
1549
1550 /// \returns True if the target prefers reductions in loop.
1551 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1552 ReductionFlags Flags) const;
1553
1554 /// \returns True if the target prefers reductions select kept in the loop
1555 /// when tail folding. i.e.
1556 /// loop:
1557 /// p = phi (0, s)
1558 /// a = add (p, x)
1559 /// s = select (mask, a, p)
1560 /// vecreduce.add(s)
1561 ///
1562 /// As opposed to the normal scheme of p = phi (0, a) which allows the select
1563 /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
1564 /// by the target, this can lead to cleaner code generation.
1565 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1566 ReductionFlags Flags) const;
1567
1568 /// Return true if the loop vectorizer should consider vectorizing an
1569 /// otherwise scalar epilogue loop.
1570 bool preferEpilogueVectorization() const;
1571
1572 /// \returns True if the target wants to expand the given reduction intrinsic
1573 /// into a shuffle sequence.
1574 bool shouldExpandReduction(const IntrinsicInst *II) const;
1575
1576 /// \returns the size cost of rematerializing a GlobalValue address relative
1577 /// to a stack reload.
1578 unsigned getGISelRematGlobalCost() const;
1579
1580 /// \returns the lower bound of a trip count to decide on vectorization
1581 /// while tail-folding.
1582 unsigned getMinTripCountTailFoldingThreshold() const;
1583
1584 /// \returns True if the target supports scalable vectors.
1585 bool supportsScalableVectors() const;
1586
1587 /// \return true when scalable vectorization is preferred.
1588 bool enableScalableVectorization() const;
1589
1590 /// \name Vector Predication Information
1591 /// @{
1592 /// Whether the target supports the %evl parameter of VP intrinsic efficiently
1593 /// in hardware, for the given opcode and type/alignment. (see LLVM Language
1594 /// Reference - "Vector Predication Intrinsics").
1595 /// Use of %evl is discouraged when that is not the case.
1596 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
1597 Align Alignment) const;
1598
1601 // keep the predicating parameter
1603 // where legal, discard the predicate parameter
1605 // transform into something else that is also predicating
1606 Convert = 2
1608
1609 // How to transform the EVL parameter.
1610 // Legal: keep the EVL parameter as it is.
1611 // Discard: Ignore the EVL parameter where it is safe to do so.
1612 // Convert: Fold the EVL into the mask parameter.
1614
1615 // How to transform the operator.
1616 // Legal: The target supports this operator.
1617 // Convert: Convert this to a non-VP operation.
1618 // The 'Discard' strategy is invalid.
1620
1621 bool shouldDoNothing() const {
1622 return (EVLParamStrategy == Legal) && (OpStrategy == Legal);
1623 }
1626 };
1627
1628 /// \returns How the target needs this vector-predicated operation to be
1629 /// transformed.
1631 /// @}
1632
1633 /// \returns Whether a 32-bit branch instruction is available in Arm or Thumb
1634 /// state.
1635 ///
1636 /// Used by the LowerTypeTests pass, which constructs an IR inline assembler
1637 /// node containing a jump table in a format suitable for the target, so it
1638 /// needs to know what format of jump table it can legally use.
1639 ///
1640 /// For non-Arm targets, this function isn't used. It defaults to returning
1641 /// false, but it shouldn't matter what it returns anyway.
1642 bool hasArmWideBranch(bool Thumb) const;
1643
1644 /// \return The maximum number of function arguments the target supports.
1645 unsigned getMaxNumArgs() const;
1646
1647 /// @}
1648
1649private:
1650 /// The abstract base class used to type erase specific TTI
1651 /// implementations.
1652 class Concept;
1653
1654 /// The template model for the base class which wraps a concrete
1655 /// implementation in a type erased interface.
1656 template <typename T> class Model;
1657
1658 std::unique_ptr<Concept> TTIImpl;
1659};
1660
1662public:
1663 virtual ~Concept() = 0;
1664 virtual const DataLayout &getDataLayout() const = 0;
1665 virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr,
1668 virtual InstructionCost
1670 const TTI::PointersChainInfo &Info, Type *AccessTy,
1672 virtual unsigned getInliningThresholdMultiplier() const = 0;
1673 virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0;
1674 virtual int getInlinerVectorBonusPercent() const = 0;
1676 virtual unsigned
1678 ProfileSummaryInfo *PSI,
1679 BlockFrequencyInfo *BFI) = 0;
1684 virtual bool hasBranchDivergence() = 0;
1685 virtual bool isSourceOfDivergence(const Value *V) = 0;
1686 virtual bool isAlwaysUniform(const Value *V) = 0;
1687 virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1688 virtual unsigned getFlatAddressSpace() = 0;
1690 Intrinsic::ID IID) const = 0;
1691 virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
1692 virtual bool
1694 virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
1695 virtual bool isSingleThreaded() const = 0;
1696 virtual std::pair<const Value *, unsigned>
1697 getPredicatedAddrSpace(const Value *V) const = 0;
1699 Value *OldV,
1700 Value *NewV) const = 0;
1701 virtual bool isLoweredToCall(const Function *F) = 0;
1704 OptimizationRemarkEmitter *ORE) = 0;
1706 PeelingPreferences &PP) = 0;
1708 AssumptionCache &AC,
1709 TargetLibraryInfo *LibInfo,
1710 HardwareLoopInfo &HWLoopInfo) = 0;
1712 virtual TailFoldingStyle
1713 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) = 0;
1714 virtual std::optional<Instruction *> instCombineIntrinsic(
1715 InstCombiner &IC, IntrinsicInst &II) = 0;
1716 virtual std::optional<Value *> simplifyDemandedUseBitsIntrinsic(
1717 InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask,
1718 KnownBits & Known, bool &KnownBitsComputed) = 0;
1719 virtual std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
1720 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts,
1721 APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3,
1722 std::function<void(Instruction *, unsigned, APInt, APInt &)>
1723 SimplifyAndSetOp) = 0;
1724 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1725 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1726 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1727 int64_t BaseOffset, bool HasBaseReg,
1728 int64_t Scale, unsigned AddrSpace,
1729 Instruction *I) = 0;
1731 const TargetTransformInfo::LSRCost &C2) = 0;
1732 virtual bool isNumRegsMajorCostOfLSR() = 0;
1734 virtual bool canMacroFuseCmp() = 0;
1735 virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
1737 TargetLibraryInfo *LibInfo) = 0;
1738 virtual AddressingModeKind
1740 virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
1741 virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
1742 virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
1743 virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
1744 virtual bool isLegalBroadcastLoad(Type *ElementTy,
1745 ElementCount NumElements) const = 0;
1746 virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
1747 virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
1749 Align Alignment) = 0;
1751 Align Alignment) = 0;
1752 virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
1753 virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
1754 virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0,
1755 unsigned Opcode1,
1756 const SmallBitVector &OpcodeMask) const = 0;
1757 virtual bool enableOrderedReductions() = 0;
1758 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1759 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1762 int64_t BaseOffset,
1763 bool HasBaseReg, int64_t Scale,
1764 unsigned AddrSpace) = 0;
1765 virtual bool LSRWithInstrQueries() = 0;
1766 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1768 virtual bool useAA() = 0;
1769 virtual bool isTypeLegal(Type *Ty) = 0;
1770 virtual unsigned getRegUsageForType(Type *Ty) = 0;
1771 virtual bool shouldBuildLookupTables() = 0;
1773 virtual bool shouldBuildRelLookupTables() = 0;
1774 virtual bool useColdCCForColdCall(Function &F) = 0;
1776 const APInt &DemandedElts,
1777 bool Insert, bool Extract,
1779 virtual InstructionCost
1781 ArrayRef<Type *> Tys,
1784 virtual bool supportsTailCalls() = 0;
1785 virtual bool supportsTailCallFor(const CallBase *CB) = 0;
1786 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1788 enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
1789 virtual bool enableSelectOptimize() = 0;
1794 unsigned BitWidth,
1795 unsigned AddressSpace,
1796 Align Alignment,
1797 unsigned *Fast) = 0;
1798 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1799 virtual bool haveFastSqrt(Type *Ty) = 0;
1801 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1803 virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
1804 const APInt &Imm, Type *Ty) = 0;
1805 virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
1807 virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
1808 const APInt &Imm, Type *Ty,
1810 Instruction *Inst = nullptr) = 0;
1812 const APInt &Imm, Type *Ty,
1814 virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
1815 virtual unsigned getRegisterClassForType(bool Vector,
1816 Type *Ty = nullptr) const = 0;
1817 virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
1819 virtual unsigned getMinVectorRegisterBitWidth() const = 0;
1820 virtual std::optional<unsigned> getMaxVScale() const = 0;
1821 virtual std::optional<unsigned> getVScaleForTuning() const = 0;
1822 virtual bool isVScaleKnownToBeAPowerOfTwo() const = 0;
1823 virtual bool
1825 virtual ElementCount getMinimumVF(unsigned ElemWidth,
1826 bool IsScalable) const = 0;
1827 virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
1828 virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
1829 Type *ScalarValTy) const = 0;
1831 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1832 virtual unsigned getCacheLineSize() const = 0;
1833 virtual std::optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
1834 virtual std::optional<unsigned> getCacheAssociativity(CacheLevel Level)
1835 const = 0;
1836
1837 /// \return How much before a load we should place the prefetch
1838 /// instruction. This is currently measured in number of
1839 /// instructions.
1840 virtual unsigned getPrefetchDistance() const = 0;
1841
1842 /// \return Some HW prefetchers can handle accesses up to a certain
1843 /// constant stride. This is the minimum stride in bytes where it
1844 /// makes sense to start adding SW prefetches. The default is 1,
1845 /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
1846 /// even below the HW prefetcher limit, and the arguments provided are
1847 /// meant to serve as a basis for deciding this for a particular loop.
1848 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
1849 unsigned NumStridedMemAccesses,
1850 unsigned NumPrefetches,
1851 bool HasCall) const = 0;
1852
1853 /// \return The maximum number of iterations to prefetch ahead. If
1854 /// the required number of iterations is more than this number, no
1855 /// prefetching is performed.
1856 virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
1857
1858 /// \return True if prefetching should also be done for writes.
1859 virtual bool enableWritePrefetching() const = 0;
1860
1861 /// \return if target want to issue a prefetch in address space \p AS.
1862 virtual bool shouldPrefetchAddressSpace(unsigned AS) const = 0;
1863
1864 virtual unsigned getMaxInterleaveFactor(ElementCount VF) = 0;
1866 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
1867 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
1868 ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0;
1869
1871 ArrayRef<int> Mask,
1873 int Index, VectorType *SubTp,
1874 ArrayRef<const Value *> Args) = 0;
1875 virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst,
1876 Type *Src, CastContextHint CCH,
1878 const Instruction *I) = 0;
1879 virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1880 VectorType *VecTy,
1881 unsigned Index) = 0;
1882 virtual InstructionCost getCFInstrCost(unsigned Opcode,
1884 const Instruction *I = nullptr) = 0;
1885 virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1886 Type *CondTy,
1887 CmpInst::Predicate VecPred,
1889 const Instruction *I) = 0;
1890 virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
1892 unsigned Index, Value *Op0,
1893 Value *Op1) = 0;
1896 unsigned Index) = 0;
1897
1898 virtual InstructionCost
1899 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
1900 const APInt &DemandedDstElts,
1902
1903 virtual InstructionCost
1904 getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1906 OperandValueInfo OpInfo, const Instruction *I) = 0;
1907 virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src,
1908 Align Alignment,
1909 unsigned AddressSpace,
1911 const Instruction *I) = 0;
1912 virtual InstructionCost
1913 getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
1914 unsigned AddressSpace,
1916 virtual InstructionCost
1917 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
1918 bool VariableMask, Align Alignment,
1920 const Instruction *I = nullptr) = 0;
1921
1923 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1924 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1925 bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
1926 virtual InstructionCost
1928 std::optional<FastMathFlags> FMF,
1930 virtual InstructionCost
1931 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
1934 unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty,
1935 FastMathFlags FMF,
1938 bool IsUnsigned, Type *ResTy, VectorType *Ty,
1940 virtual InstructionCost
1944 ArrayRef<Type *> Tys,
1946 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1947 virtual InstructionCost
1949 virtual InstructionCost
1952 MemIntrinsicInfo &Info) = 0;
1953 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1955 Type *ExpectedType) = 0;
1957 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
1958 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
1959 std::optional<uint32_t> AtomicElementSize) const = 0;
1960
1962 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1963 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
1964 unsigned SrcAlign, unsigned DestAlign,
1965 std::optional<uint32_t> AtomicCpySize) const = 0;
1966 virtual bool areInlineCompatible(const Function *Caller,
1967 const Function *Callee) const = 0;
1968 virtual bool areTypesABICompatible(const Function *Caller,
1969 const Function *Callee,
1970 const ArrayRef<Type *> &Types) const = 0;
1971 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1972 virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1973 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1974 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1975 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1976 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1977 Align Alignment,
1978 unsigned AddrSpace) const = 0;
1979 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1980 Align Alignment,
1981 unsigned AddrSpace) const = 0;
1983 ElementCount VF) const = 0;
1984 virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
1985 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1986 unsigned ChainSizeInBytes,
1987 VectorType *VecTy) const = 0;
1988 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1989 unsigned ChainSizeInBytes,
1990 VectorType *VecTy) const = 0;
1991 virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
1992 ReductionFlags) const = 0;
1993 virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
1994 ReductionFlags) const = 0;
1995 virtual bool preferEpilogueVectorization() const = 0;
1996
1997 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1998 virtual unsigned getGISelRematGlobalCost() const = 0;
1999 virtual unsigned getMinTripCountTailFoldingThreshold() const = 0;
2000 virtual bool enableScalableVectorization() const = 0;
2001 virtual bool supportsScalableVectors() const = 0;
2002 virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2003 Align Alignment) const = 0;
2004 virtual VPLegalization
2006 virtual bool hasArmWideBranch(bool Thumb) const = 0;
2007 virtual unsigned getMaxNumArgs() const = 0;
2008};
2009
2010template <typename T>
2011class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
2012 T Impl;
2013
2014public:
2015 Model(T Impl) : Impl(std::move(Impl)) {}
2016 ~Model() override = default;
2017
2018 const DataLayout &getDataLayout() const override {
2019 return Impl.getDataLayout();
2020 }
2021
2022 InstructionCost
2023 getGEPCost(Type *PointeeType, const Value *Ptr,
2024 ArrayRef<const Value *> Operands,
2026 return Impl.getGEPCost(PointeeType, Ptr, Operands, CostKind);
2027 }
2028 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs,
2029 const Value *Base,
2030 const PointersChainInfo &Info,
2031 Type *AccessTy,
2032 TargetCostKind CostKind) override {
2033 return Impl.getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
2034 }
2035 unsigned getInliningThresholdMultiplier() const override {
2036 return Impl.getInliningThresholdMultiplier();
2037 }
2038 unsigned adjustInliningThreshold(const CallBase *CB) override {
2039 return Impl.adjustInliningThreshold(CB);
2040 }
2041 int getInlinerVectorBonusPercent() const override {
2042 return Impl.getInlinerVectorBonusPercent();
2043 }
2044 InstructionCost getMemcpyCost(const Instruction *I) override {
2045 return Impl.getMemcpyCost(I);
2046 }
2047 InstructionCost getInstructionCost(const User *U,
2048 ArrayRef<const Value *> Operands,
2049 TargetCostKind CostKind) override {
2050 return Impl.getInstructionCost(U, Operands, CostKind);
2051 }
2052 BranchProbability getPredictableBranchThreshold() override {
2053 return Impl.getPredictableBranchThreshold();
2054 }
2055 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
2056 bool isSourceOfDivergence(const Value *V) override {
2057 return Impl.isSourceOfDivergence(V);
2058 }
2059
2060 bool isAlwaysUniform(const Value *V) override {
2061 return Impl.isAlwaysUniform(V);
2062 }
2063
2064 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2065 return Impl.isValidAddrSpaceCast(FromAS, ToAS);
2066 }
2067
2068 unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
2069
2070 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
2071 Intrinsic::ID IID) const override {
2072 return Impl.collectFlatAddressOperands(OpIndexes, IID);
2073 }
2074
2075 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
2076 return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
2077 }
2078
2079 bool
2080 canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const override {
2081 return Impl.canHaveNonUndefGlobalInitializerInAddressSpace(AS);
2082 }
2083
2084 unsigned getAssumedAddrSpace(const Value *V) const override {
2085 return Impl.getAssumedAddrSpace(V);
2086 }
2087
2088 bool isSingleThreaded() const override { return Impl.isSingleThreaded(); }
2089
2090 std::pair<const Value *, unsigned>
2091 getPredicatedAddrSpace(const Value *V) const override {
2092 return Impl.getPredicatedAddrSpace(V);
2093 }
2094
2095 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
2096 Value *NewV) const override {
2097 return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
2098 }
2099
2100 bool isLoweredToCall(const Function *F) override {
2101 return Impl.isLoweredToCall(F);
2102 }
2103 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
2104 UnrollingPreferences &UP,
2105 OptimizationRemarkEmitter *ORE) override {
2106 return Impl.getUnrollingPreferences(L, SE, UP, ORE);
2107 }
2108 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
2109 PeelingPreferences &PP) override {
2110 return Impl.getPeelingPreferences(L, SE, PP);
2111 }
2112 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
2113 AssumptionCache &AC, TargetLibraryInfo *LibInfo,
2114 HardwareLoopInfo &HWLoopInfo) override {
2115 return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
2116 }
2117 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) override {
2118 return Impl.preferPredicateOverEpilogue(TFI);
2119 }
2121 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) override {
2122 return Impl.getPreferredTailFoldingStyle(IVUpdateMayOverflow);
2123 }
2124 std::optional<Instruction *>
2125 instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) override {
2126 return Impl.instCombineIntrinsic(IC, II);
2127 }
2128 std::optional<Value *>
2129 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
2130 APInt DemandedMask, KnownBits &Known,
2131 bool &KnownBitsComputed) override {
2132 return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
2133 KnownBitsComputed);
2134 }
2135 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic(
2136 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
2137 APInt &UndefElts2, APInt &UndefElts3,
2138 std::function<void(Instruction *, unsigned, APInt, APInt &)>
2139 SimplifyAndSetOp) override {
2140 return Impl.simplifyDemandedVectorEltsIntrinsic(
2141 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
2142 SimplifyAndSetOp);
2143 }
2144 bool isLegalAddImmediate(int64_t Imm) override {
2145 return Impl.isLegalAddImmediate(Imm);
2146 }
2147 bool isLegalICmpImmediate(int64_t Imm) override {
2148 return Impl.isLegalICmpImmediate(Imm);
2149 }
2150 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
2151 bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
2152 Instruction *I) override {
2153 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2154 AddrSpace, I);
2155 }
2156 bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
2157 const TargetTransformInfo::LSRCost &C2) override {
2158 return Impl.isLSRCostLess(C1, C2);
2159 }
2160 bool isNumRegsMajorCostOfLSR() override {
2161 return Impl.isNumRegsMajorCostOfLSR();
2162 }
2163 bool isProfitableLSRChainElement(Instruction *I) override {
2164 return Impl.isProfitableLSRChainElement(I);
2165 }
2166 bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
2167 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
2168 DominatorTree *DT, AssumptionCache *AC,
2169 TargetLibraryInfo *LibInfo) override {
2170 return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
2171 }
2173 getPreferredAddressingMode(const Loop *L,
2174 ScalarEvolution *SE) const override {
2175 return Impl.getPreferredAddressingMode(L, SE);
2176 }
2177 bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
2178 return Impl.isLegalMaskedStore(DataType, Alignment);
2179 }
2180 bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
2181 return Impl.isLegalMaskedLoad(DataType, Alignment);
2182 }
2183 bool isLegalNTStore(Type *DataType, Align Alignment) override {
2184 return Impl.isLegalNTStore(DataType, Alignment);
2185 }
2186 bool isLegalNTLoad(Type *DataType, Align Alignment) override {
2187 return Impl.isLegalNTLoad(DataType, Alignment);
2188 }
2189 bool isLegalBroadcastLoad(Type *ElementTy,
2190 ElementCount NumElements) const override {
2191 return Impl.isLegalBroadcastLoad(ElementTy, NumElements);
2192 }
2193 bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
2194 return Impl.isLegalMaskedScatter(DataType, Alignment);
2195 }
2196 bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
2197 return Impl.isLegalMaskedGather(DataType, Alignment);
2198 }
2199 bool forceScalarizeMaskedGather(VectorType *DataType,
2200 Align Alignment) override {
2201 return Impl.forceScalarizeMaskedGather(DataType, Alignment);
2202 }
2203 bool forceScalarizeMaskedScatter(VectorType *DataType,
2204 Align Alignment) override {
2205 return Impl.forceScalarizeMaskedScatter(DataType, Alignment);
2206 }
2207 bool isLegalMaskedCompressStore(Type *DataType) override {
2208 return Impl.isLegalMaskedCompressStore(DataType);
2209 }
2210 bool isLegalMaskedExpandLoad(Type *DataType) override {
2211 return Impl.isLegalMaskedExpandLoad(DataType);
2212 }
2213 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1,
2214 const SmallBitVector &OpcodeMask) const override {
2215 return Impl.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask);
2216 }
2217 bool enableOrderedReductions() override {
2218 return Impl.enableOrderedReductions();
2219 }
2220 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
2221 return Impl.hasDivRemOp(DataType, IsSigned);
2222 }
2223 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
2224 return Impl.hasVolatileVariant(I, AddrSpace);
2225 }
2226 bool prefersVectorizedAddressing() override {
2227 return Impl.prefersVectorizedAddressing();
2228 }
2229 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
2230 int64_t BaseOffset, bool HasBaseReg,
2231 int64_t Scale,
2232 unsigned AddrSpace) override {
2233 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
2234 AddrSpace);
2235 }
2236 bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
2237 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
2238 return Impl.isTruncateFree(Ty1, Ty2);
2239 }
2240 bool isProfitableToHoist(Instruction *I) override {
2241 return Impl.isProfitableToHoist(I);
2242 }
2243 bool useAA() override { return Impl.useAA(); }
2244 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
2245 unsigned getRegUsageForType(Type *Ty) override {
2246 return Impl.getRegUsageForType(Ty);
2247 }
2248 bool shouldBuildLookupTables() override {
2249 return Impl.shouldBuildLookupTables();
2250 }
2251 bool shouldBuildLookupTablesForConstant(Constant *C) override {
2252 return Impl.shouldBuildLookupTablesForConstant(C);
2253 }
2254 bool shouldBuildRelLookupTables() override {
2255 return Impl.shouldBuildRelLookupTables();
2256 }
2257 bool useColdCCForColdCall(Function &F) override {
2258 return Impl.useColdCCForColdCall(F);
2259 }
2260
2261 InstructionCost getScalarizationOverhead(VectorType *Ty,
2262 const APInt &DemandedElts,
2263 bool Insert, bool Extract,
2264 TargetCostKind CostKind) override {
2265 return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
2266 CostKind);
2267 }
2268 InstructionCost
2269 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
2270 ArrayRef<Type *> Tys,
2271 TargetCostKind CostKind) override {
2272 return Impl.getOperandsScalarizationOverhead(Args, Tys, CostKind);
2273 }
2274
2275 bool supportsEfficientVectorElementLoadStore() override {
2276 return Impl.supportsEfficientVectorElementLoadStore();
2277 }
2278
2279 bool supportsTailCalls() override { return Impl.supportsTailCalls(); }
2280 bool supportsTailCallFor(const CallBase *CB) override {
2281 return Impl.supportsTailCallFor(CB);
2282 }
2283
2284 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
2285 return Impl.enableAggressiveInterleaving(LoopHasReductions);
2286 }
2287 MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
2288 bool IsZeroCmp) const override {
2289 return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
2290 }
2291 bool enableInterleavedAccessVectorization() override {
2292 return Impl.enableInterleavedAccessVectorization();
2293 }
2294 bool enableSelectOptimize() override {
2295 return Impl.enableSelectOptimize();
2296 }
2297 bool enableMaskedInterleavedAccessVectorization() override {
2298 return Impl.enableMaskedInterleavedAccessVectorization();
2299 }
2300 bool isFPVectorizationPotentiallyUnsafe() override {
2301 return Impl.isFPVectorizationPotentiallyUnsafe();
2302 }
2303 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
2304 unsigned AddressSpace, Align Alignment,
2305 unsigned *Fast) override {
2306 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
2307 Alignment, Fast);
2308 }
2309 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
2310 return Impl.getPopcntSupport(IntTyWidthInBit);
2311 }
2312 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
2313
2314 bool isExpensiveToSpeculativelyExecute(const Instruction* I) override {
2315 return Impl.isExpensiveToSpeculativelyExecute(I);
2316 }
2317
2318 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
2319 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
2320 }
2321
2322 InstructionCost getFPOpCost(Type *Ty) override {
2323 return Impl.getFPOpCost(Ty);
2324 }
2325
2326 InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
2327 const APInt &Imm, Type *Ty) override {
2328 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
2329 }
2330 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty,
2331 TargetCostKind CostKind) override {
2332 return Impl.getIntImmCost(Imm, Ty, CostKind);
2333 }
2334 InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx,
2335 const APInt &Imm, Type *Ty,
2337 Instruction *Inst = nullptr) override {
2338 return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
2339 }
2340 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
2341 const APInt &Imm, Type *Ty,
2342 TargetCostKind CostKind) override {
2343 return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
2344 }
2345 unsigned getNumberOfRegisters(unsigned ClassID) const override {
2346 return Impl.getNumberOfRegisters(ClassID);
2347 }
2348 unsigned getRegisterClassForType(bool Vector,
2349 Type *Ty = nullptr) const override {
2350 return Impl.getRegisterClassForType(Vector, Ty);
2351 }
2352 const char *getRegisterClassName(unsigned ClassID) const override {
2353 return Impl.getRegisterClassName(ClassID);
2354 }
2355 TypeSize getRegisterBitWidth(RegisterKind K) const override {
2356 return Impl.getRegisterBitWidth(K);
2357 }
2358 unsigned getMinVectorRegisterBitWidth() const override {
2359 return Impl.getMinVectorRegisterBitWidth();
2360 }
2361 std::optional<unsigned> getMaxVScale() const override {
2362 return Impl.getMaxVScale();
2363 }
2364 std::optional<unsigned> getVScaleForTuning() const override {
2365 return Impl.getVScaleForTuning();
2366 }
2367 bool isVScaleKnownToBeAPowerOfTwo() const override {
2368 return Impl.isVScaleKnownToBeAPowerOfTwo();
2369 }
2370 bool shouldMaximizeVectorBandwidth(
2371 TargetTransformInfo::RegisterKind K) const override {
2372 return Impl.shouldMaximizeVectorBandwidth(K);
2373 }
2374 ElementCount getMinimumVF(unsigned ElemWidth,
2375 bool IsScalable) const override {
2376 return Impl.getMinimumVF(ElemWidth, IsScalable);
2377 }
2378 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
2379 return Impl.getMaximumVF(ElemWidth, Opcode);
2380 }
2381 unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy,
2382 Type *ScalarValTy) const override {
2383 return Impl.getStoreMinimumVF(VF, ScalarMemTy, ScalarValTy);
2384 }
2385 bool shouldConsiderAddressTypePromotion(
2386 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
2387 return Impl.shouldConsiderAddressTypePromotion(
2388 I, AllowPromotionWithoutCommonHeader);
2389 }
2390 unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
2391 std::optional<unsigned> getCacheSize(CacheLevel Level) const override {
2392 return Impl.getCacheSize(Level);
2393 }
2394 std::optional<unsigned>
2395 getCacheAssociativity(CacheLevel Level) const override {
2396 return Impl.getCacheAssociativity(Level);
2397 }
2398
2399 /// Return the preferred prefetch distance in terms of instructions.
2400 ///
2401 unsigned getPrefetchDistance() const override {
2402 return Impl.getPrefetchDistance();
2403 }
2404
2405 /// Return the minimum stride necessary to trigger software
2406 /// prefetching.
2407 ///
2408 unsigned getMinPrefetchStride(unsigned NumMemAccesses,
2409 unsigned NumStridedMemAccesses,
2410 unsigned NumPrefetches,
2411 bool HasCall) const override {
2412 return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
2413 NumPrefetches, HasCall);
2414 }
2415
2416 /// Return the maximum prefetch distance in terms of loop
2417 /// iterations.
2418 ///
2419 unsigned getMaxPrefetchIterationsAhead() const override {
2420 return Impl.getMaxPrefetchIterationsAhead();
2421 }
2422
2423 /// \return True if prefetching should also be done for writes.
2424 bool enableWritePrefetching() const override {
2425 return Impl.enableWritePrefetching();
2426 }
2427
2428 /// \return if target want to issue a prefetch in address space \p AS.
2429 bool shouldPrefetchAddressSpace(unsigned AS) const override {
2430 return Impl.shouldPrefetchAddressSpace(AS);
2431 }
2432
2433 unsigned getMaxInterleaveFactor(ElementCount VF) override {
2434 return Impl.getMaxInterleaveFactor(VF);
2435 }
2436 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
2437 unsigned &JTSize,
2438 ProfileSummaryInfo *PSI,
2439 BlockFrequencyInfo *BFI) override {
2440 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
2441 }
2442 InstructionCost getArithmeticInstrCost(
2443 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
2444 OperandValueInfo Opd1Info, OperandValueInfo Opd2Info,
2445 ArrayRef<const Value *> Args,
2446 const Instruction *CxtI = nullptr) override {
2447 return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
2448 Args, CxtI);
2449 }
2450
2451 InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp,
2452 ArrayRef<int> Mask,
2454 VectorType *SubTp,
2455 ArrayRef<const Value *> Args) override {
2456 return Impl.getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp, Args);
2457 }
2458 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
2459 CastContextHint CCH,
2461 const Instruction *I) override {
2462 return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
2463 }
2464 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst,
2465 VectorType *VecTy,
2466 unsigned Index) override {
2467 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
2468 }
2469 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind,
2470 const Instruction *I = nullptr) override {
2471 return Impl.getCFInstrCost(Opcode, CostKind, I);
2472 }
2473 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
2474 CmpInst::Predicate VecPred,
2476 const Instruction *I) override {
2477 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
2478 }
2479 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val,
2481 unsigned Index, Value *Op0,
2482 Value *Op1) override {
2483 return Impl.getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
2484 }
2485 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val,
2487 unsigned Index) override {
2488 return Impl.getVectorInstrCost(I, Val, CostKind, Index);
2489 }
2490 InstructionCost
2491 getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF,
2492 const APInt &DemandedDstElts,
2493 TTI::TargetCostKind CostKind) override {
2494 return Impl.getReplicationShuffleCost(EltTy, ReplicationFactor, VF,
2495 DemandedDstElts, CostKind);
2496 }
2497 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2498 unsigned AddressSpace,
2500 OperandValueInfo OpInfo,
2501 const Instruction *I) override {
2502 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind,
2503 OpInfo, I);
2504 }
2505 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
2506 unsigned AddressSpace,
2508 const Instruction *I) override {
2509 return Impl.getVPMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2510 CostKind, I);
2511 }
2512 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
2513 Align Alignment, unsigned AddressSpace,
2514 TTI::TargetCostKind CostKind) override {
2515 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
2516 CostKind);
2517 }
2518 InstructionCost
2519 getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
2520 bool VariableMask, Align Alignment,
2522 const Instruction *I = nullptr) override {
2523 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
2524 Alignment, CostKind, I);
2525 }
2526 InstructionCost getInterleavedMemoryOpCost(
2527 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
2528 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
2529 bool UseMaskForCond, bool UseMaskForGaps) override {
2530 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
2531 Alignment, AddressSpace, CostKind,
2532 UseMaskForCond, UseMaskForGaps);
2533 }
2534 InstructionCost
2535 getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
2536 std::optional<FastMathFlags> FMF,
2537 TTI::TargetCostKind CostKind) override {
2538 return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
2539 }
2540 InstructionCost
2541 getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned,
2542 FastMathFlags FMF,
2543 TTI::TargetCostKind CostKind) override {
2544 return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, FMF, CostKind);
2545 }
2546 InstructionCost
2547 getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy,
2548 VectorType *Ty, FastMathFlags FMF,
2549 TTI::TargetCostKind CostKind) override {
2550 return Impl.getExtendedReductionCost(Opcode, IsUnsigned, ResTy, Ty, FMF,
2551 CostKind);
2552 }
2553 InstructionCost
2554 getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty,
2555 TTI::TargetCostKind CostKind) override {
2556 return Impl.getMulAccReductionCost(IsUnsigned, ResTy, Ty, CostKind);
2557 }
2558 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
2559 TTI::TargetCostKind CostKind) override {
2560 return Impl.getIntrinsicInstrCost(ICA, CostKind);
2561 }
2562 InstructionCost getCallInstrCost(Function *F, Type *RetTy,
2563 ArrayRef<Type *> Tys,
2564 TTI::TargetCostKind CostKind) override {
2565 return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
2566 }
2567 unsigned getNumberOfParts(Type *Tp) override {
2568 return Impl.getNumberOfParts(Tp);
2569 }
2570 InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
2571 const SCEV *Ptr) override {
2572 return Impl.getAddressComputationCost(Ty, SE, Ptr);
2573 }
2574 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
2575 return Impl.getCostOfKeepingLiveOverCall(Tys);
2576 }
2577 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
2578 MemIntrinsicInfo &Info) override {
2579 return Impl.getTgtMemIntrinsic(Inst, Info);
2580 }
2581 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
2582 return Impl.getAtomicMemIntrinsicMaxElementSize();
2583 }
2584 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
2585 Type *ExpectedType) override {
2586 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
2587 }
2588 Type *getMemcpyLoopLoweringType(
2589 LLVMContext &Context, Value *Length, unsigned SrcAddrSpace,
2590 unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign,
2591 std::optional<uint32_t> AtomicElementSize) const override {
2592 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
2593 DestAddrSpace, SrcAlign, DestAlign,
2594 AtomicElementSize);
2595 }
2596 void getMemcpyLoopResidualLoweringType(
2597 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
2598 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
2599 unsigned SrcAlign, unsigned DestAlign,
2600 std::optional<uint32_t> AtomicCpySize) const override {
2601 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
2602 SrcAddrSpace, DestAddrSpace,
2603 SrcAlign, DestAlign, AtomicCpySize);
2604 }
2605 bool areInlineCompatible(const Function *Caller,
2606 const Function *Callee) const override {
2607 return Impl.areInlineCompatible(Caller, Callee);
2608 }
2609 bool areTypesABICompatible(const Function *Caller, const Function *Callee,
2610 const ArrayRef<Type *> &Types) const override {
2611 return Impl.areTypesABICompatible(Caller, Callee, Types);
2612 }
2613 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
2614 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
2615 }
2616 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
2617 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
2618 }
2619 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
2620 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
2621 }
2622 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
2623 return Impl.isLegalToVectorizeLoad(LI);
2624 }
2625 bool isLegalToVectorizeStore(StoreInst *SI) const override {
2626 return Impl.isLegalToVectorizeStore(SI);
2627 }
2628 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
2629 unsigned AddrSpace) const override {
2630 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
2631 AddrSpace);
2632 }
2633 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
2634 unsigned AddrSpace) const override {
2635 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
2636 AddrSpace);
2637 }
2638 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
2639 ElementCount VF) const override {
2640 return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
2641 }
2642 bool isElementTypeLegalForScalableVector(Type *Ty) const override {
2643 return Impl.isElementTypeLegalForScalableVector(Ty);
2644 }
2645 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
2646 unsigned ChainSizeInBytes,
2647 VectorType *VecTy) const override {
2648 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
2649 }
2650 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
2651 unsigned ChainSizeInBytes,
2652 VectorType *VecTy) const override {
2653 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
2654 }
2655 bool preferInLoopReduction(unsigned Opcode, Type *Ty,
2656 ReductionFlags Flags) const override {
2657 return Impl.preferInLoopReduction(Opcode, Ty, Flags);
2658 }
2659 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
2660 ReductionFlags Flags) const override {
2661 return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
2662 }
2663 bool preferEpilogueVectorization() const override {
2664 return Impl.preferEpilogueVectorization();
2665 }
2666
2667 bool shouldExpandReduction(const IntrinsicInst *II) const override {
2668 return Impl.shouldExpandReduction(II);
2669 }
2670
2671 unsigned getGISelRematGlobalCost() const override {
2672 return Impl.getGISelRematGlobalCost();
2673 }
2674
2675 unsigned getMinTripCountTailFoldingThreshold() const override {
2676 return Impl.getMinTripCountTailFoldingThreshold();
2677 }
2678
2679 bool supportsScalableVectors() const override {
2680 return Impl.supportsScalableVectors();
2681 }
2682
2683 bool enableScalableVectorization() const override {
2684 return Impl.enableScalableVectorization();
2685 }
2686
2687 bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
2688 Align Alignment) const override {
2689 return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
2690 }
2691
2693 getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
2694 return Impl.getVPLegalizationStrategy(PI);
2695 }
2696
2697 bool hasArmWideBranch(bool Thumb) const override {
2698 return Impl.hasArmWideBranch(Thumb);
2699 }
2700
2701 unsigned getMaxNumArgs() const override {
2702 return Impl.getMaxNumArgs();
2703 }
2704};
2705
2706template <typename T>
2708 : TTIImpl(new Model<T>(Impl)) {}
2709
2710/// Analysis pass providing the \c TargetTransformInfo.
2711///
2712/// The core idea of the TargetIRAnalysis is to expose an interface through
2713/// which LLVM targets can analyze and provide information about the middle
2714/// end's target-independent IR. This supports use cases such as target-aware
2715/// cost modeling of IR constructs.
2716///
2717/// This is a function analysis because much of the cost modeling for targets
2718/// is done in a subtarget specific way and LLVM supports compiling different
2719/// functions targeting different subtargets in order to support runtime
2720/// dispatch according to the observed subtarget.
2721class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
2722public:
2724
2725 /// Default construct a target IR analysis.
2726 ///
2727 /// This will use the module's datalayout to construct a baseline
2728 /// conservative TTI result.
2730
2731 /// Construct an IR analysis pass around a target-provide callback.
2732 ///
2733 /// The callback will be called with a particular function for which the TTI
2734 /// is needed and must return a TTI object for that function.
2735 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
2736
2737 // Value semantics. We spell out the constructors for MSVC.
2739 : TTICallback(Arg.TTICallback) {}
2741 : TTICallback(std::move(Arg.TTICallback)) {}
2743 TTICallback = RHS.TTICallback;
2744 return *this;
2745 }
2747 TTICallback = std::move(RHS.TTICallback);
2748 return *this;
2749 }
2750
2752
2753private:
2755 static AnalysisKey Key;
2756
2757 /// The callback used to produce a result.
2758 ///
2759 /// We use a completely opaque callback so that targets can provide whatever
2760 /// mechanism they desire for constructing the TTI for a given function.
2761 ///
2762 /// FIXME: Should we really use std::function? It's relatively inefficient.
2763 /// It might be possible to arrange for even stateful callbacks to outlive
2764 /// the analysis and thus use a function_ref which would be lighter weight.
2765 /// This may also be less error prone as the callback is likely to reference
2766 /// the external TargetMachine, and that reference needs to never dangle.
2767 std::function<Result(const Function &)> TTICallback;
2768
2769 /// Helper function used as the callback in the default constructor.
2770 static Result getDefaultTTI(const Function &F);
2771};
2772
2773/// Wrapper pass for TargetTransformInfo.
2774///
2775/// This pass can be constructed from a TTI object which it stores internally
2776/// and is queried by passes.
2778 TargetIRAnalysis TIRA;
2779 std::optional<TargetTransformInfo> TTI;
2780
2781 virtual void anchor();
2782
2783public:
2784 static char ID;
2785
2786 /// We must provide a default constructor for the pass but it should
2787 /// never be used.
2788 ///
2789 /// Use the constructor below or call one of the creation routines.
2791
2793
2795};
2796
2797/// Create an analysis pass wrapper around a TTI object.
2798///
2799/// This analysis pass just holds the TTI instance and makes it available to
2800/// clients.
2802
2803} // namespace llvm
2804
2805#endif
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static cl::opt< bool > ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false), cl::desc("Force allowance of nested hardware loops"))
static cl::opt< bool > ForceHardwareLoopPHI("force-hardware-loop-phi", cl::Hidden, cl::init(false), cl::desc("Force hardware loop counter to be updated through a phi"))
This file defines an InstructionCost class that is used when calculating the cost of an instruction,...
std::optional< unsigned > getMaxVScale(const Function &F, const TargetTransformInfo &TTI)
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
mir Rename Register Operands
This header defines various interfaces for pass management in LLVM.
static cl::opt< RegAllocEvictionAdvisorAnalysis::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysis::AdvisorMode::Development, "development", "for training")))
This file implements the SmallBitVector class.
@ Flags
Definition: TextStubV5.cpp:93
Value * RHS
Class for arbitrary precision integers.
Definition: APInt.h:75
API to communicate dependencies between analyses during invalidation.
Definition: PassManager.h:661
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:711
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
Convenience struct for specifying and reasoning about fast-math flags.
Definition: FMF.h:20
ImmutablePass class - This class is used to provide information that does not need to be run.
Definition: Pass.h:282
The core instruction combiner logic.
Definition: InstCombiner.h:45
static InstructionCost getInvalid(CostType Val=0)
Class to represent integer types.
Definition: DerivedTypes.h:40
Drive the analysis of interleaved memory accesses in the loop.
Definition: VectorUtils.h:778
const SmallVectorImpl< Type * > & getArgTypes() const
const SmallVectorImpl< const Value * > & getArgs() const
InstructionCost getScalarizationCost() const
const IntrinsicInst * getInst() const
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:177
LoopVectorizationLegality checks if it is legal to vectorize a loop, and to what vectorization factor...
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:47
The optimization diagnostic interface.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
Analysis providing profile information.
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:69
This class represents an analyzed expression in the program.
The main scalar evolution driver.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
Multiway switch.
Analysis pass providing the TargetTransformInfo.
TargetIRAnalysis(const TargetIRAnalysis &Arg)
TargetIRAnalysis & operator=(const TargetIRAnalysis &RHS)
Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
TargetIRAnalysis()
Default construct a target IR analysis.
TargetIRAnalysis & operator=(TargetIRAnalysis &&RHS)
TargetIRAnalysis(TargetIRAnalysis &&Arg)
Provides information about what library functions are available for the current target.
Wrapper pass for TargetTransformInfo.
TargetTransformInfoWrapperPass()
We must provide a default constructor for the pass but it should never be used.
TargetTransformInfo & getTTI(const Function &F)
virtual std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed)=0
virtual InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr)=0
virtual TypeSize getRegisterBitWidth(RegisterKind K) const =0
virtual const DataLayout & getDataLayout() const =0
virtual bool isProfitableLSRChainElement(Instruction *I)=0
virtual InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr)=0
virtual InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind)=0
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE)=0
virtual bool isLegalNTStore(Type *DataType, Align Alignment)=0
virtual unsigned adjustInliningThreshold(const CallBase *CB)=0
virtual bool isExpensiveToSpeculativelyExecute(const Instruction *I)=0
virtual bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const =0
virtual std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II)=0
virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const =0
virtual bool isLegalNTLoad(Type *DataType, Align Alignment)=0
virtual bool enableOrderedReductions()=0
virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit)=0
virtual unsigned getNumberOfRegisters(unsigned ClassID) const =0
virtual std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const =0
virtual bool isLegalMaskedGather(Type *DataType, Align Alignment)=0
virtual bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const =0
virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool shouldPrefetchAddressSpace(unsigned AS) const =0
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)=0
virtual unsigned getMinVectorRegisterBitWidth() const =0
virtual std::optional< unsigned > getVScaleForTuning() const =0
virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind)=0
virtual bool supportsEfficientVectorElementLoadStore()=0
virtual unsigned getRegUsageForType(Type *Ty)=0
virtual bool hasArmWideBranch(bool Thumb) const =0
virtual MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const =0
virtual InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, OperandValueInfo Opd1Info, OperandValueInfo Opd2Info, ArrayRef< const Value * > Args, const Instruction *CxtI=nullptr)=0
virtual unsigned getAssumedAddrSpace(const Value *V) const =0
virtual bool isTruncateFree(Type *Ty1, Type *Ty2)=0
virtual bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const =0
virtual InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TargetCostKind CostKind)=0
virtual bool shouldBuildLookupTables()=0
virtual bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const =0
virtual bool isLegalToVectorizeStore(StoreInst *SI) const =0
virtual unsigned getGISelRematGlobalCost() const =0
virtual void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize) const =0
virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)=0
virtual bool forceScalarizeMaskedScatter(VectorType *DataType, Align Alignment)=0
virtual bool supportsTailCallFor(const CallBase *CB)=0
virtual std::optional< unsigned > getMaxVScale() const =0
virtual InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind)=0
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const =0
virtual unsigned getMaxNumArgs() const =0
virtual bool shouldExpandReduction(const IntrinsicInst *II) const =0
virtual bool enableWritePrefetching() const =0
virtual bool useColdCCForColdCall(Function &F)=0
virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags) const =0
virtual int getInlinerVectorBonusPercent() const =0
virtual unsigned getMaxPrefetchIterationsAhead() const =0
virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment)=0
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual unsigned getCacheLineSize() const =0
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const =0
virtual bool shouldBuildLookupTablesForConstant(Constant *C)=0
virtual bool preferPredicateOverEpilogue(TailFoldingInfo *TFI)=0
virtual bool isProfitableToHoist(Instruction *I)=0
virtual InstructionCost getFPOpCost(Type *Ty)=0
virtual unsigned getMinTripCountTailFoldingThreshold() const =0
virtual bool enableMaskedInterleavedAccessVectorization()=0
virtual unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const =0
virtual bool isTypeLegal(Type *Ty)=0
virtual bool isLegalMaskedExpandLoad(Type *DataType)=0
virtual BranchProbability getPredictableBranchThreshold()=0
virtual bool enableScalableVectorization() const =0
virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info)=0
virtual bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual const char * getRegisterClassName(unsigned ClassID) const =0
virtual unsigned getMaxInterleaveFactor(ElementCount VF)=0
virtual bool enableAggressiveInterleaving(bool LoopHasReductions)=0
virtual bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const =0
virtual bool haveFastSqrt(Type *Ty)=0
virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TTI::TargetCostKind CostKind)=0
virtual std::optional< unsigned > getCacheSize(CacheLevel Level) const =0
virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const TTI::PointersChainInfo &Info, Type *AccessTy, TTI::TargetCostKind CostKind)=0
virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP)=0
virtual std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const =0
virtual bool supportsScalableVectors() const =0
virtual bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment)=0
virtual unsigned getNumberOfParts(Type *Tp)=0
virtual bool isLegalICmpImmediate(int64_t Imm)=0
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)=0
virtual InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, const Instruction *I=nullptr)=0
virtual bool isElementTypeLegalForScalableVector(Type *Ty) const =0
virtual TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true)=0
virtual bool hasDivRemOp(Type *DataType, bool IsSigned)=0
virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const =0
virtual bool shouldBuildRelLookupTables()=0
virtual InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TargetCostKind CostKind)=0
virtual bool isLoweredToCall(const Function *F)=0
virtual bool isSourceOfDivergence(const Value *V)=0
virtual bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const =0
virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment)=0
virtual InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput)=0
virtual bool isFPVectorizationPotentiallyUnsafe()=0
virtual Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType)=0
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const =0
virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty)=0
virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind)=0
virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getInliningThresholdMultiplier() const =0
virtual InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)=0
virtual bool isLegalMaskedStore(Type *DataType, Align Alignment)=0
virtual InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, TTI::TargetCostKind CostKind, unsigned Index)=0
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const =0
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const =0
virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask, TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, ArrayRef< const Value * > Args)=0
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const =0
virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2)=0
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I)=0
virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const =0
virtual InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false)=0
virtual bool prefersVectorizedAddressing()=0
virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, OperandValueInfo OpInfo, const Instruction *I)=0
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo)=0
virtual Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize) const =0
virtual InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind)=0
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)=0
virtual bool isAlwaysUniform(const Value *V)=0
virtual InstructionCost getMemcpyCost(const Instruction *I)=0
virtual ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const =0
virtual bool areInlineCompatible(const Function *Caller, const Function *Callee) const =0
virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)=0
virtual std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp)=0
virtual unsigned getFlatAddressSpace()=0
virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, Value *Op0, Value *Op1)=0
virtual unsigned getPrefetchDistance() const =0
virtual InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF, TTI::TargetCostKind CostKind)=0
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace)=0
virtual bool isLegalMaskedCompressStore(Type *DataType)=0
virtual bool isNumRegsMajorCostOfLSR()=0
virtual bool isSingleThreaded() const =0
virtual bool isLegalAddImmediate(int64_t Imm)=0
virtual Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const =0
virtual bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader)=0
virtual unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const =0
virtual bool isVScaleKnownToBeAPowerOfTwo() const =0
virtual InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys)=0
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const =0
virtual bool enableInterleavedAccessVectorization()=0
virtual unsigned getAtomicMemIntrinsicMaxElementSize() const =0
virtual bool preferEpilogueVectorization() const =0
virtual InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, const Instruction *I)=0
virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const =0
virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const =0
virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, Align Alignment, unsigned *Fast)=0
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const
bool isLegalToVectorizeLoad(LoadInst *LI) const
std::optional< unsigned > getVScaleForTuning() const
static CastContextHint getCastContextHint(const Instruction *I)
Calculates a CastContextHint from I.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
InstructionCost getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, const APInt &DemandedDstElts, TTI::TargetCostKind CostKind)
bool shouldBuildLookupTables() const
Return true if switches should be turned into lookup tables for the target.
bool isLegalToVectorizeStore(StoreInst *SI) const
InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, FastMathFlags FMF=FastMathFlags(), TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool enableAggressiveInterleaving(bool LoopHasReductions) const
Don't restrict interleaved unrolling to small loops.
void getMemcpyLoopResidualLoweringType(SmallVectorImpl< Type * > &OpsOut, LLVMContext &Context, unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicCpySize=std::nullopt) const
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const
Return true if it is faster to check if a floating-point value is NaN (or not-NaN) versus a compariso...
bool preferInLoopReduction(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool supportsEfficientVectorElementLoadStore() const
If target has efficient vector element load/store instructions, it can return true here so that inser...
bool isAlwaysUniform(const Value *V) const
unsigned getAssumedAddrSpace(const Value *V) const
bool hasBranchDivergence() const
Return true if branch divergence exists.
bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1, const TargetTransformInfo::LSRCost &C2) const
Return true if LSR cost of C1 is lower than C2.
bool prefersVectorizedAddressing() const
Return true if target doesn't mind addresses in vectors.
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE=nullptr, const SCEV *Ptr=nullptr) const
bool invalidate(Function &, const PreservedAnalyses &, FunctionAnalysisManager::Invalidator &)
Handle the invalidation of this information.
void getUnrollingPreferences(Loop *L, ScalarEvolution &, UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const
Get target-customized preferences for the generic loop unrolling transformation.
bool shouldBuildLookupTablesForConstant(Constant *C) const
Return true if switches should be turned into lookup tables containing this constant value for the ta...
InstructionCost getOperandsScalarizationOverhead(ArrayRef< const Value * > Args, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instructions unique non-constant operands.
bool supportsTailCallFor(const CallBase *CB) const
If target supports tail call on CB.
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const
Targets can implement their own combinations for target-specific intrinsics.
bool isProfitableLSRChainElement(Instruction *I) const
TypeSize getRegisterBitWidth(RegisterKind K) const
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, OperandValueInfo OpdInfo={OK_AnyValue, OP_None}, const Instruction *I=nullptr) const
std::optional< unsigned > getMaxVScale() const
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, bool UseMaskForCond=false, bool UseMaskForGaps=false) const
std::optional< Value * > simplifyDemandedVectorEltsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, APInt &UndefElts2, APInt &UndefElts3, std::function< void(Instruction *, unsigned, APInt, APInt &)> SimplifyAndSetOp) const
Can be used to implement target-specific instruction combining.
bool enableOrderedReductions() const
Return true if we should be enabling ordered reductions for the target.
InstructionCost getInstructionCost(const User *U, TargetCostKind CostKind) const
This is a helper function which calls the three-argument getInstructionCost with Operands which are t...
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, std::optional< FastMathFlags > FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of vector reduction intrinsics.
unsigned getAtomicMemIntrinsicMaxElementSize() const
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
bool LSRWithInstrQueries() const
Return true if the loop strength reduce pass should make Instruction* based TTI queries to isLegalAdd...
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const
TailFoldingStyle getPreferredTailFoldingStyle(bool IVUpdateMayOverflow=true) const
Query the target what the preferred style of tail folding is.
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
unsigned getRegUsageForType(Type *Ty) const
Returns the estimated number of registers required to represent Ty.
bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const
\Returns true if the target supports broadcasting a load to a vector of type <NumElements x ElementTy...
bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const
std::pair< const Value *, unsigned > getPredicatedAddrSpace(const Value *V) const
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, Type *ResTy, VectorType *Ty, FastMathFlags FMF, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of a reduc...
static OperandValueInfo getOperandInfo(const Value *V)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, VectorType *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Calculate the cost of an extended reduction pattern, similar to getArithmeticReductionCost of an Add ...
unsigned getRegisterClassForType(bool Vector, Type *Ty=nullptr) const
PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const
Return hardware support for population count.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) const
bool isElementTypeLegalForScalableVector(Type *Ty) const
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
unsigned getMaxPrefetchIterationsAhead() const
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const
Return true if globals in this address space can have initializers other than undef.
ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const
InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
bool enableMaskedInterleavedAccessVectorization() const
Enable matching of interleaved access groups that contain predicated accesses or gaps and therefore v...
InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty, TargetCostKind CostKind, Instruction *Inst=nullptr) const
Return the expected cost of materialization for the given integer immediate of the specified type for...
TargetTransformInfo & operator=(TargetTransformInfo &&RHS)
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCK_Latency
The latency of instruction.
bool areTypesABICompatible(const Function *Caller, const Function *Callee, const ArrayRef< Type * > &Types) const
bool enableSelectOptimize() const
Should the Select Optimization pass be enabled and ran.
bool collectFlatAddressOperands(SmallVectorImpl< int > &OpIndexes, Intrinsic::ID IID) const
Return any intrinsic address operand indexes which may be rewritten if they use a flat address space ...
OperandValueProperties
Additional properties of an operand's values.
InstructionCost getPointersChainCost(ArrayRef< const Value * > Ptrs, const Value *Base, const PointersChainInfo &Info, Type *AccessTy, TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
Estimate the cost of a chain of pointers (typically pointer operands of a chain of loads or stores wi...
bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const
bool isSourceOfDivergence(const Value *V) const
Returns whether V is a source of divergence.
bool isLegalMaskedCompressStore(Type *DataType) const
Return true if the target supports masked compress store.
bool isLegalICmpImmediate(int64_t Imm) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
static bool requiresOrderedReduction(std::optional< FastMathFlags > FMF)
A helper function to determine the type of reduction algorithm used for a given Opcode and set of Fas...
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, ElementCount VF) const
std::optional< unsigned > getCacheAssociativity(CacheLevel Level) const
bool isLegalNTLoad(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal load.
InstructionCost getMemcpyCost(const Instruction *I) const
unsigned adjustInliningThreshold(const CallBase *CB) const
bool isLegalAddImmediate(int64_t Imm) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput) const
bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, TargetLibraryInfo *LibInfo) const
Return true if the target can save a compare for loop count, for example hardware loop saves a compar...
InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target,...
Value * rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const
Rewrite intrinsic call II such that OldV will be replaced with NewV, which has a different address sp...
InstructionCost getCostOfKeepingLiveOverCall(ArrayRef< Type * > Tys) const
unsigned getMinPrefetchStride(unsigned NumMemAccesses, unsigned NumStridedMemAccesses, unsigned NumPrefetches, bool HasCall) const
Some HW prefetchers can handle accesses up to a certain constant stride.
bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
bool shouldPrefetchAddressSpace(unsigned AS) const
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
unsigned getMinVectorRegisterBitWidth() const
bool isLegalNTStore(Type *DataType, Align Alignment) const
Return true if the target supports nontemporal store.
unsigned getFlatAddressSpace() const
Returns the address space ID for a target's 'flat' address space.
bool hasArmWideBranch(bool Thumb) const
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
const char * getRegisterClassName(unsigned ClassID) const
bool preferEpilogueVectorization() const
Return true if the loop vectorizer should consider vectorizing an otherwise scalar epilogue loop.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), unsigned *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, ArrayRef< int > Mask=std::nullopt, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, int Index=0, VectorType *SubTp=nullptr, ArrayRef< const Value * > Args=std::nullopt) const
InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, Align Alignment, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
bool isLegalMaskedExpandLoad(Type *DataType) const
Return true if the target supports masked expand load.
bool hasActiveVectorLength(unsigned Opcode, Type *DataType, Align Alignment) const
PopcntSupportKind
Flags indicating the kind of support for population count.
InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty) const
Return the expected cost for the given integer when optimising for size.
AddressingModeKind getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const
Return the preferred addressing mode LSR should make efforts to generate.
bool isLoweredToCall(const Function *F) const
Test whether calls to a function lower to actual program function calls.
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const
Query the target whether it would be profitable to convert the given loop into a hardware loop.
unsigned getInliningThresholdMultiplier() const
unsigned getNumberOfRegisters(unsigned ClassID) const
bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, const SmallBitVector &OpcodeMask) const
Return true if this is an alternating opcode pattern that can be lowered to a single instruction on t...
bool isProfitableToHoist(Instruction *I) const
Return true if it is profitable to hoist instruction in the then/else to before if.
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const
Return true if the given instruction (assumed to be a memory access instruction) has a volatile varia...
bool isFPVectorizationPotentiallyUnsafe() const
Indicate that it is potentially unsafe to automatically vectorize floating-point operations because t...
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool shouldBuildRelLookupTables() const
Return true if lookup tables should be turned into relative lookup tables.
unsigned getStoreMinimumVF(unsigned VF, Type *ScalarMemTy, Type *ScalarValTy) const
std::optional< unsigned > getCacheSize(CacheLevel Level) const
std::optional< Value * > simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, APInt DemandedMask, KnownBits &Known, bool &KnownBitsComputed) const
Can be used to implement target-specific instruction combining.
bool hasDivRemOp(Type *DataType, bool IsSigned) const
Return true if the target has a unified operation to calculate division and remainder.
TargetCostConstants
Underlying constants for 'cost' values in this interface.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
bool enableInterleavedAccessVectorization() const
Enable matching of interleaved access groups.
unsigned getMinTripCountTailFoldingThreshold() const
InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands, TargetCostKind CostKind=TCK_SizeAndLatency) const
Estimate the cost of a GEP operation when lowered.
InstructionCost getInstructionCost(const User *U, ArrayRef< const Value * > Operands, TargetCostKind CostKind) const
Estimate the cost of a given IR user when lowered.
unsigned getMaxInterleaveFactor(ElementCount VF) const
bool isNumRegsMajorCostOfLSR() const
Return true if LSR major cost is number of registers.
InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index) const
unsigned getGISelRematGlobalCost() const
MemIndexedMode
The type of load/store indexing.
@ MIM_PostInc
Post-incrementing.
@ MIM_PostDec
Post-decrementing.
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool useColdCCForColdCall(Function &F) const
Return true if the input function which is cold at all call sites, should use coldcc calling conventi...
InstructionCost getFPOpCost(Type *Ty) const
Return the expected cost of supporting the floating point operation of the specified type.
bool supportsTailCalls() const
If the target supports tail calls.
bool canMacroFuseCmp() const
Return true if the target can fuse a compare and branch.
Value * getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, Type *ExpectedType) const
bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const
Query the target whether the specified address space cast from FromAS to ToAS is valid.
unsigned getNumberOfParts(Type *Tp) const
Type * getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, unsigned SrcAddrSpace, unsigned DestAddrSpace, unsigned SrcAlign, unsigned DestAlign, std::optional< uint32_t > AtomicElementSize=std::nullopt) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
InstructionCost getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract, TTI::TargetCostKind CostKind) const
Estimate the overhead of scalarizing an instruction.
bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const
Query the target whether it would be prefered to create a predicated vector loop, which can avoid the...
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
bool shouldExpandReduction(const IntrinsicInst *II) const
TargetTransformInfo(T Impl)
Construct a TTI object using a type implementing the Concept API below.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
ShuffleKind
The various kinds of shuffle patterns for vector queries.
@ SK_InsertSubvector
InsertSubvector. Index indicates start offset.
@ SK_Select
Selects elements from the corresponding lane of either source operand.
@ SK_PermuteSingleSrc
Shuffle elements of single source vector with any shuffle mask.
@ SK_Transpose
Transpose two vectors.
@ SK_Splice
Concatenates elements from the first input vector with elements of the second input vector.
@ SK_Broadcast
Broadcast element 0 to all other elements.
@ SK_PermuteTwoSrc
Merge elements from two source vectors into one with any shuffle mask.
@ SK_Reverse
Reverse the order of the vector.
@ SK_ExtractSubvector
ExtractSubvector Index indicates start offset.
void getPeelingPreferences(Loop *L, ScalarEvolution &SE, PeelingPreferences &PP) const
Get target-customized preferences for the generic loop peeling transformation.
InstructionCost getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency) const
InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
CastContextHint
Represents a hint about the context in which a cast is used.
@ Reversed
The cast is used with a reversed load/store.
@ Masked
The cast is used with a masked load/store.
@ None
The cast is not used with a load/store of any kind.
@ Normal
The cast is used with a normal load/store.
@ Interleave
The cast is used with an interleaved load/store.
@ GatherScatter
The cast is used with a gather/scatter.
InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, const Instruction *I=nullptr) const
OperandValueKind
Additional information about an operand's possible values.
CacheLevel
The possible cache levels.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
This is the common base class for vector predication intrinsics.
LLVM Value Representation.
Definition: Value.h:74
Base class of all SIMD vector types.
Definition: DerivedTypes.h:400
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool areInlineCompatible(const Function &Caller, const Function &Callee)
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Type
MessagePack types as defined in the standard, with the exception of Integer being divided into a sign...
Definition: MsgPackReader.h:48
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Length
Definition: DWP.cpp:440
AddressSpace
Definition: NVPTXBaseInfo.h:21
AtomicOrdering
Atomic ordering for LLVM's memory model.
TargetTransformInfo TTI
ImmutablePass * createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA)
Create an analysis pass wrapper around a TTI object.
@ None
Not a recurrence.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1946
@ DataAndControlFlowWithoutRuntimeCheck
Use predicate to control both data and control flow, but modify the trip count so that a runtime over...
@ DataAndControlFlow
Use predicate to control both data and control flow.
@ DataWithoutLaneMask
Same as Data, but avoids using the get.active.lane.mask intrinsic to calculate the mask and instead i...
Definition: BitVector.h:858
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
A CRTP mix-in that provides informational APIs needed for analysis passes.
Definition: PassManager.h:394
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: PassManager.h:69
Attributes of a target dependent hardware loop.
bool canAnalyze(LoopInfo &LI)
bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, DominatorTree &DT, bool ForceNestedLoop=false, bool ForceHardwareLoopPHI=false)
Information about a load/store intrinsic defined by the target.
Value * PtrVal
This is the pointer that the intrinsic is loading from or storing to.
InterleavedAccessInfo * IAI
TailFoldingInfo(TargetLibraryInfo *TLI, LoopVectorizationLegality *LVL, InterleavedAccessInfo *IAI)
TargetLibraryInfo * TLI
LoopVectorizationLegality * LVL
unsigned Insns
TODO: Some of these could be merged.
Returns options for expansion of memcmp. IsZeroCmp is.
bool AllowPeeling
Allow peeling off loop iterations.
bool AllowLoopNestsPeeling
Allow peeling off loop iterations for loop nests.
bool PeelProfiledIterations
Allow peeling basing on profile.
unsigned PeelCount
A forced peeling factor (the number of bodied of the original loop that should be peeled off before t...
Describe known properties for a set of pointers.
unsigned IsKnownStride
True if distance between any two neigbouring pointers is a known value.
unsigned IsUnitStride
These properties only valid if SameBaseAddress is set.
unsigned IsSameBaseAddress
All the GEPs in a set have same base address.
Flags describing the kind of vector reduction.
bool IsSigned
Whether the operation is a signed int reduction.
bool IsMaxOp
If the op a min/max kind, true if it's a max operation.
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
Parameters that control the generic loop unrolling transformation.
unsigned Count
A forced unrolling factor (the number of concatenated bodies of the original loop in the unrolled loo...
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned Threshold
The cost threshold for the unrolled loop.
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
bool UnrollVectorizedLoop
Don't disable runtime unroll for the loops which were vectorized.
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
unsigned MaxPercentThresholdBoost
If complete unrolling will reduce the cost of the loop, we will boost the Threshold by a certain perc...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned MaxIterationsCountToAnalyze
Don't allow loop unrolling to simulate more than this number of iterations when checking full unroll ...
bool AllowRemainder
Allow generation of a loop remainder (extra iterations after unroll).
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
unsigned FullUnrollMaxCount
Set the maximum unrolling factor for full unrolling.
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).
bool AllowExpensiveTripCount
Allow emitting expensive instructions (such as divisions) when computing the trip count of a loop for...
VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy)